Exemple #1
0
    def test_diff_refs_modified_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')

        files_mock_copy = deepcopy(files_mock)
        del files_mock_copy[
            'zdj7WZzR8Tw87Dx3dm76W5aehnT23GSbXbQ9qo73JgtwREGwB']
        files_mock_copy['NewHash'] = {'7.jpg'}

        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 0)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 1)
Exemple #2
0
    def test_diff_refs_add_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')
        files_mock_copy = deepcopy(files_mock)
        files_mock_copy[
            'zPaksM5tNewHashQ2VABPvvfC3VW6wFRTWKvFhUW5QaDx6JMoma'] = {
                '11.jpg'
            }
        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 1)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 0)
Exemple #3
0
def clone_config_repository(url, folder, track):
    try:
        if get_root_path():
            log.error(output_messages['ERROR_IN_INTIALIZED_PROJECT'],
                      class_name=ADMIN_CLASS_NAME)
            return False
    except RootPathException:
        pass

    git_dir = '.git'

    try:
        if folder is not None:
            project_dir = os.path.join(os.getcwd(), folder)
            ensure_path_exists(project_dir)
        else:
            project_dir = os.getcwd()

        if len(os.listdir(project_dir)) != 0:
            log.error(output_messages['ERROR_PATH_NOT_EMPTY'] % project_dir,
                      class_name=ADMIN_CLASS_NAME)
            return False
        Repo.clone_from(url, project_dir)
    except Exception as e:
        error_msg = handle_clone_exception(e, folder, project_dir)
        log.error(error_msg, class_name=ADMIN_CLASS_NAME)
        return False

    if not check_successfully_clone(project_dir, git_dir):
        return False

    if not track:
        clear(os.path.join(project_dir, git_dir))

    return True
Exemple #4
0
    def donwload_folder(self, file_path, folder_id):

        files_in_folder = self.list_files_in_folder(folder_id)
        for file in files_in_folder:
            complete_file_path = os.path.join(file_path, file.get('name'))
            ensure_path_exists(file_path)
            self.download_file(complete_file_path, file)
Exemple #5
0
    def test_remote_fsck(self):
        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        ohfs.put(HDATA_IMG_1)

        s3 = boto3.resource(
            's3',
            region_name='us-east-1',
            aws_access_key_id='fake_access_key',
            aws_secret_access_key='fake_secret_key',
        )

        s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').delete()
        self.assertRaises(botocore.exceptions.ClientError, lambda: self.check_delete(s3, testbucketname))
        mdpath = os.path.join(self.tmp_dir, 'metadata-test')

        dataset_spec = get_sample_spec(testbucketname)
        specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex')
        ensure_path_exists(specpath)

        yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec'))
        manifestpath = os.path.join(specpath, 'MANIFEST.yaml')

        yaml_save({'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'}}, manifestpath)
        fullspecpath = os.path.join(specpath, os.path.join(specpath, 'dataset-ex.spec'))
        spec = 'vision-computing__images__dataset-ex__5'
        c = yaml_load('hdata/config.yaml')
        r = LocalRepository(c, hfspath)
        ret = r.remote_fsck(mdpath, spec, fullspecpath, 2, True, True)
        self.assertTrue(ret)

        self.assertEqual(None, s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').load())
Exemple #6
0
    def test_fetch(self):
        mdpath = os.path.join(self.tmp_dir, 'metadata-test')
        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        config_spec = get_sample_config_spec(testbucketname, testprofile,
                                             testregion)
        dataset_spec = get_sample_spec(testbucketname)

        specpath = os.path.join(mdpath, 'vision-computing', 'images',
                                'dataset-ex')
        ensure_path_exists(specpath)
        yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec'))

        manifestpath = os.path.join(specpath, 'MANIFEST.yaml')
        yaml_save(
            {
                'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh':
                {'imghires.jpg'}
            }, manifestpath)

        objectpath = os.path.join(self.tmp_dir, 'objects-test')
        spec = 'vision-computing__images__dataset-ex__5'

        r = LocalRepository(config_spec, objectpath)
        r.fetch(mdpath, spec, None)

        fs = set()
        for root, dirs, files in os.walk(objectpath):
            for file in files:
                fs.add(file)

        self.assertEqual(len(hs), len(fs))
        self.assertTrue(len(hs.difference(fs)) == 0)
Exemple #7
0
    def test_05_hard_with_data_in_subpath(self):
        entity = DATASETS
        subpath = 'data'

        init_repository(entity, self)
        first_commit_file_name = 'file1'
        data_path = os.path.join(entity, entity+'-ex', subpath)
        ensure_path_exists(data_path)
        create_file(data_path, first_commit_file_name, '0', '')
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion')))
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', '')))

        second_commit_file_name = 'file2'
        create_file(data_path, second_commit_file_name, '1', '')
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion')))
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', '')))

        self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name)))
        self.assertTrue(os.path.exists(os.path.join(data_path, second_commit_file_name)))
        self.assertIn(output_messages['INFO_INITIALIZING_RESET'] % ('--hard', 'HEAD~1'),
                      check_output(MLGIT_RESET % (entity, entity+'-ex') + ' --hard --reference=head~1'))
        self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name)))
        self.assertFalse(os.path.exists(os.path.join(data_path, second_commit_file_name)))
        self.assertRegex(check_output(MLGIT_STATUS % (entity, entity+'-ex')),
                         r'Changes to be committed:\n\tNew file: datasets-ex.spec\n\nUntracked files:\n\nCorrupted files')
        self._check_dir(self.dataset_tag)
    def test_10_checkout_with_unsaved_work(self):
        entity = DATASETS
        init_repository(entity, self)
        self._create_new_tag(entity, 'tag1')
        entity_dir = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)
        with open(os.path.join(entity_dir, 'tag2'), 'wt') as z:
            z.write('0' * 100)
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))
        self.assertNotIn(
            ERROR_MESSAGE,
            check_output(MLGIT_COMMIT %
                         (entity, entity + '-ex', '--version=3')))
        self.assertNotIn(ERROR_MESSAGE,
                         check_output(MLGIT_PUSH % (entity, entity + '-ex')))

        unsaved_file_dir = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME,
                                        'folderA')
        ensure_path_exists(unsaved_file_dir)
        with open(os.path.join(unsaved_file_dir, 'test-unsaved-file'),
                  'wt') as z:
            z.write('0' * 100)
        output_command = check_output(
            MLGIT_CHECKOUT % (DATASETS, DATASET_NAME + ' --version=2'))
        self.assertIn(output_messages['ERROR_DISCARDED_LOCAL_CHANGES'],
                      output_command)
        self.assertIn('test-unsaved-file', output_command)
Exemple #9
0
    def test_get_metrics(self):
        repo_type = MODELS
        mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata')
        specpath = os.path.join('vision-computer', 'images')
        entity = 'model-ex'
        m = Metadata(entity, self.test_dir, config, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        spec_metadata_path = os.path.join(mdpath, specpath, entity, 'model-ex.spec')
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        spec_file = yaml_load(spec_metadata_path)
        spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY])
        del spec_file[DATASET_SPEC_KEY]
        spec_file[MODEL_SPEC_KEY]['metrics'] = {'metric_1': 0, 'metric_2': 1}
        yaml_save(spec_file, spec_metadata_path)

        tag = 'vision-computer__images__model-ex__1'
        sha = m.commit(spec_metadata_path, specpath)
        m.tag_add(tag)

        metrics = m._get_metrics(entity, sha)

        test_table = PrettyTable()
        test_table.field_names = ['Name', 'Value']
        test_table.align['Name'] = 'l'
        test_table.align['Value'] = 'l'
        test_table.add_row(['metric_1', 0])
        test_table.add_row(['metric_2', 1])
        test_metrics = '\nmetrics:\n{}'.format(test_table.get_string())

        self.assertEqual(metrics, test_metrics)
Exemple #10
0
def move_entity_to_dir(tmp_dir, artifact_name, entity_type):
    workspace = os.path.join(tmp_dir, entity_type, artifact_name)
    entity_dir = os.path.join('folderA')
    workspace_with_dir = os.path.join(tmp_dir, entity_type, entity_dir)
    ensure_path_exists(workspace_with_dir)
    shutil.move(workspace, workspace_with_dir)
    return entity_dir, workspace, workspace_with_dir
Exemple #11
0
    def test_05_hard_with_data_in_subpath(self):
        entity = DATASETS
        subpath = 'data'

        init_repository(entity, self)
        first_commit_file_name = 'file1'
        data_path = os.path.join(entity, entity+'-ex', subpath)
        ensure_path_exists(data_path)
        create_file(data_path, first_commit_file_name, '0', '')
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion')))
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', '')))

        second_commit_file_name = 'file2'
        create_file(data_path, second_commit_file_name, '1', '')
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion')))
        self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', '')))

        self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name)))
        self.assertTrue(os.path.exists(os.path.join(data_path, second_commit_file_name)))
        self.assertIn(output_messages['INFO_INITIALIZING_RESET'] % ('--hard', 'HEAD~1'),
                      check_output(MLGIT_RESET % (entity, entity+'-ex') + ' --hard --reference=head~1'))
        self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name)))
        self.assertFalse(os.path.exists(os.path.join(data_path, second_commit_file_name)))
        self.assertRegex(check_output(MLGIT_STATUS % (entity, entity+'-ex')),
                         DATASET_UNPUBLISHED_COMMITS_INFO_REGEX.format(unpublished_commits=1, pluralize_char='') +
                         DATASET_PUSH_INFO_REGEX +
                         r'Changes to be committed:\s+'
                         r'New file: datasets-ex.spec')
        self._check_dir(self.dataset_tag)
Exemple #12
0
 def move_metadata_dir(self, old_directory, new_directory):
     repo = Repo(self.__path)
     old_path = os.path.join(self.__path, old_directory)
     new_path = os.path.join(self.__path, os.path.dirname(new_directory))
     ensure_path_exists(new_path)
     repo.git.mv([old_path, new_path])
     if not os.listdir(os.path.dirname(old_path)):
         clear(os.path.dirname(old_path))
Exemple #13
0
    def get(self, objectkey, path, file):
        log.info('Getting file [%s] from local index' % file, class_name=MULTI_HASH_CLASS_NAME)
        dirs = os.path.dirname(file)
        fulldir = os.path.join(path, dirs)
        ensure_path_exists(fulldir)

        dstfile = os.path.join(path, file)
        return self._hfs.get(objectkey, dstfile)
Exemple #14
0
 def put(self, srcfile):
     dstfile = self._get_hashpath(os.path.basename(srcfile))
     ensure_path_exists(os.path.dirname(dstfile))
     os.link(srcfile, dstfile)
     fullpath = os.path.join(self._logpath, STORAGE_LOG)
     with open(fullpath, 'a') as log_file:
         self._log(dstfile, log_file=log_file)
     return os.path.basename(srcfile)
Exemple #15
0
    def get(self, objectkey, path, file):
        log.info(output_messages['INFO_GETTING_FILE'] % file,
                 class_name=MULTI_HASH_CLASS_NAME)
        dirs = os.path.dirname(file)
        fulldir = os.path.join(path, dirs)
        ensure_path_exists(fulldir)

        dstfile = os.path.join(path, file)
        return self._hfs.get(objectkey, dstfile)
Exemple #16
0
    def ilink(self, key, dstfile):
        srckey = self._get_hashpath(key)
        ensure_path_exists(os.path.dirname(dstfile))

        log.debug(output_messages['DEBUG_LINK_FROM_TO'] % (srckey, dstfile), class_name=HASH_FS_CLASS_NAME)
        if os.path.exists(dstfile) is True:
            set_write_read(dstfile)
            os.unlink(dstfile)

        os.link(srckey, dstfile)
Exemple #17
0
    def ilink(self, key, dstfile):
        srckey = self._get_hashpath(key)
        ensure_path_exists(os.path.dirname(dstfile))

        log.debug('Link from [%s] to [%s]' % (srckey, dstfile),
                  class_name=HASH_FS_CLASS_NAME)
        if os.path.exists(dstfile) is True:
            set_write_read(dstfile)
            os.unlink(dstfile)

        os.link(srckey, dstfile)
Exemple #18
0
    def test_get_update_links_wspace_with_duplicates(self):
        wspath = os.path.join(self.tmp_dir, 'wspace')

        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        key = ohfs.put(HDATA_IMG_1)
        fidx = FullIndex(self.tmp_dir, self.tmp_dir)
        cachepath = os.path.join(self.tmp_dir, 'cachefs')
        cache = Cache(cachepath, '', '')

        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        c = get_sample_config_spec(testbucketname, testprofile, testregion)

        r = LocalRepository(c, hfspath)
        r._update_cache(cache, key)

        mfiles = {}
        files = {DATA_IMG_1, DATA_IMG_2}
        r._update_links_wspace(cache, fidx, files, key, wspath, mfiles, Status.u.name, 'strict')

        wspace_file = os.path.join(wspath, DATA_IMG_1)
        self.assertTrue(os.path.exists(wspace_file))
        self.assertEqual(self.md5sum(HDATA_IMG_1), self.md5sum(wspace_file))

        wspace_file = os.path.join(wspath, DATA_IMG_2)
        self.assertTrue(os.path.exists(wspace_file))
        self.assertEqual(self.md5sum(HDATA_IMG_1), self.md5sum(wspace_file))
        st = os.stat(wspace_file)
        self.assertTrue(st.st_nlink == 3)
        self.assertEqual(mfiles, {DATA_IMG_1: 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh',
                                  DATA_IMG_2: 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh'})

        wspath = os.path.join(self.tmp_dir, 'wspace')
        ensure_path_exists(wspath)
        to_be_removed = os.path.join(wspath, 'to_be_removed')
        with open(to_be_removed, 'w') as f:
            f.write('DEAD\n')

        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        key = ohfs.put(HDATA_IMG_1)
        fidx = FullIndex(self.tmp_dir, self.tmp_dir)
        cachepath = os.path.join(self.tmp_dir, 'cachefs')
        cache = Cache(cachepath, '', '')
        c = yaml_load('hdata/config.yaml')
        r = LocalRepository(c, hfspath)
        r._update_cache(cache, key)

        mfiles = {}
        files = {DATA_IMG_1, DATA_IMG_2}
        r._update_links_wspace(cache, fidx, files, key, wspath, mfiles, Status.u.name, 'strict')
        r._remove_unused_links_wspace(wspath, mfiles)
        self.assertFalse(os.path.exists(to_be_removed))
Exemple #19
0
def clone_config_repository(url, folder, track):
    try:
        if get_root_path():
            log.error('You are in initialized ml-git project.',
                      class_name=ADMIN_CLASS_NAME)
            return False
    except RootPathException:
        pass

    git_dir = '.git'

    try:
        if folder is not None:
            project_dir = os.path.join(os.getcwd(), folder)
            ensure_path_exists(project_dir)
        else:
            project_dir = os.getcwd()

        if len(os.listdir(project_dir)) != 0:
            log.error(
                'The path [%s] is not an empty directory. Consider using --folder to create an empty folder.'
                % project_dir,
                class_name=ADMIN_CLASS_NAME)
            return False
        Repo.clone_from(url, project_dir)
    except Exception as e:
        error_msg = str(e)
        if (e.__class__ == GitCommandError and 'Permission denied' in str(
                e.args[2])) or e.__class__ == PermissionError:
            error_msg = 'Permission denied in folder %s' % project_dir
        else:
            if folder is not None:
                clear(project_dir)
            if e.__class__ == GitCommandError:
                error_msg = 'Could not read from remote repository.'
        log.error(error_msg, class_name=ADMIN_CLASS_NAME)
        return False

    try:
        os.chdir(project_dir)
        get_root_path()
    except RootPathException:
        clear(project_dir)
        log.error('Wrong minimal configuration files!',
                  class_name=ADMIN_CLASS_NAME)
        clear(git_dir)
        return False

    if not track:
        clear(os.path.join(project_dir, git_dir))

    return True
Exemple #20
0
    def commit_metadata(self, index_path, tags, commit_msg, changed_files,
                        mutability, ws_path):
        spec_file = os.path.join(index_path, 'metadata', self._spec,
                                 self._spec + SPEC_EXTENSION)
        full_metadata_path, categories_sub_path, metadata = self._full_metadata_path(
            spec_file)
        log.debug('Metadata path [%s]' % full_metadata_path,
                  class_name=METADATA_CLASS_NAME)

        if full_metadata_path is None:
            return None, None
        elif categories_sub_path is None:
            return None, None

        ensure_path_exists(full_metadata_path)

        ret = self.__commit_manifest(full_metadata_path, index_path,
                                     changed_files, mutability)
        if ret is False:
            log.info('No files to commit for [%s]' % self._spec,
                     class_name=METADATA_CLASS_NAME)
            return None, None

        try:
            self.__commit_metadata(full_metadata_path, index_path, metadata,
                                   tags, ws_path)
        except Exception:
            return None, None
        # generates a tag to associate to the commit
        tag = self.metadata_tag(metadata)

        # check if tag already exists in the ml-git repository
        tags = self._tag_exists(tag)
        if len(tags) > 0:
            log.error(
                'Tag [%s] already exists in the ml-git repository. '
                'Consider using --bumpversion parameter to increment the version number for your dataset.'
                % tag,
                class_name=METADATA_CLASS_NAME)
            for t in tags:
                log.error('\t%s' % t)
            return None, None

        if commit_msg is not None and len(commit_msg) > 0:
            msg = commit_msg
        else:
            # generates a commit message
            msg = self.metadata_message(metadata)
        log.debug('Commit message [%s]' % msg, class_name=METADATA_CLASS_NAME)
        sha = self.commit(categories_sub_path, msg)
        self.tag_add(tag)
        return str(tag), str(sha)
Exemple #21
0
    def _store_chunk(self, filename, data):
        fullpath = self._get_hashpath(filename)
        ensure_path_exists(os.path.dirname(fullpath))

        if os.path.isfile(fullpath) is True:
            log.debug(output_messages['DEBUG_CHUNK_ALREADY_EXISTS'] % (filename, len(data)), class_name=HASH_FS_CLASS_NAME)
            return False

        if data is not None:
            log.debug(output_messages['DEBUG_ADDING_CHUNK'] % (filename, len(data)), class_name=HASH_FS_CLASS_NAME)
            with open(fullpath, 'wb') as f:
                f.write(data)
            return True
Exemple #22
0
 def test_remove_from_workspace(self):
     img = 'image.jpg'
     data_path = os.path.join(self.tmp_dir, 'data')
     ensure_path_exists(data_path)
     file1 = os.path.join(self.tmp_dir, img)
     file2 = os.path.join(data_path, img)
     with open(file1, 'w'), open(file2, 'w'):
         pass
     self.assertTrue(os.path.exists(file1))
     self.assertTrue(os.path.exists(file2))
     remove_from_workspace({img}, self.tmp_dir, 'dataex')
     self.assertFalse(os.path.exists(file1))
     self.assertFalse(os.path.exists(file2))
Exemple #23
0
 def test_09_push_entity_with_dir(self):
     init_repository(DATASETS, self)
     workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)
     self.assertTrue(os.path.exists(workspace))
     entity_dir = os.path.join('folderA', 'folderB')
     workspace_with_dir = os.path.join(self.tmp_dir, DATASETS, entity_dir)
     ensure_path_exists(workspace_with_dir)
     shutil.move(workspace, workspace_with_dir)
     add_file(self, DATASETS, '--bumpversion', 'new', entity_dir=entity_dir)
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '')))
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME)))
     self.assertTrue(os.path.exists(workspace_with_dir))
     self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata', entity_dir)))
Exemple #24
0
    def add_metadata(self, basepath, filepath):
        log.debug('Add file [%s] to ml-git index' % filepath, class_name=MULTI_HASH_CLASS_NAME)
        fullpath = os.path.join(basepath, filepath)

        metadatapath = os.path.join(self._path, 'metadata', self._spec)
        ensure_path_exists(metadatapath)

        dstpath = os.path.join(metadatapath, filepath)
        if not os.path.exists(dstpath):
            shutil.copy2(fullpath, dstpath)
        else:
            os.unlink(dstpath)
            shutil.copy2(fullpath, dstpath)
Exemple #25
0
    def commit_metadata(self, index_path, tags, commit_msg, changed_files,
                        mutability, ws_path):
        spec_file = os.path.join(index_path, 'metadata', self._spec,
                                 self._spec + SPEC_EXTENSION)
        full_metadata_path, entity_sub_path, metadata = self._full_metadata_path(
            spec_file)
        log.debug(output_messages['DEBUG_METADATA_PATH'] % full_metadata_path,
                  class_name=METADATA_CLASS_NAME)

        if full_metadata_path is None:
            return None, None
        elif entity_sub_path is None:
            return None, None

        ensure_path_exists(full_metadata_path)

        ret = self.__commit_manifest(full_metadata_path, index_path,
                                     changed_files, mutability)
        if ret is False:
            log.info(output_messages['INFO_NO_FILES_COMMIT_FOR'] % self._spec,
                     class_name=METADATA_CLASS_NAME)
            return None, None

        try:
            self.__commit_metadata(full_metadata_path, index_path, metadata,
                                   tags, ws_path)
        except Exception:
            return None, None
        # generates a tag to associate to the commit
        tag = self.metadata_tag(metadata)

        # check if tag already exists in the ml-git repository
        tags = self._tag_exists(tag)
        if len(tags) > 0:
            log.error(output_messages[
                'ERROR_TAG_ALREADY_EXISTS_CONSIDER_USER_VERSION'] % tag,
                      class_name=METADATA_CLASS_NAME)
            for t in tags:
                log.error(output_messages['ERROR_METADATA_MESSAGE'] % t)
            return None, None

        if commit_msg is not None and len(commit_msg) > 0:
            msg = commit_msg
        else:
            # generates a commit message
            msg = self.metadata_message(metadata)
        log.debug(output_messages['DEBUG_COMMIT_MESSAGE'] % msg,
                  class_name=METADATA_CLASS_NAME)
        sha = self.commit(entity_sub_path, msg)
        self.tag_add(tag)
        return str(tag), str(sha)
Exemple #26
0
    def add_metadata(self, basepath, filepath):
        log.debug(output_messages['DEBUG_ADD_FILE'] % filepath,
                  class_name=MULTI_HASH_CLASS_NAME)
        fullpath = os.path.join(basepath, filepath)

        metadatapath = os.path.join(self._path, 'metadata', self._spec)
        ensure_path_exists(metadatapath)

        dstpath = os.path.join(metadatapath, filepath)
        if not os.path.exists(dstpath):
            shutil.copy2(fullpath, dstpath)
        else:
            os.unlink(dstpath)
            shutil.copy2(fullpath, dstpath)
Exemple #27
0
    def _store_chunk(self, filename, data):
        fullpath = self._get_hashpath(filename)
        ensure_path_exists(os.path.dirname(fullpath))

        if os.path.isfile(fullpath) is True:
            log.debug('Chunk [%s]-[%d] already exists' % (filename, len(data)),
                      class_name=HASH_FS_CLASS_NAME)
            return False

        if data is not None:
            log.debug('Add chunk [%s]-[%d]' % (filename, len(data)),
                      class_name=HASH_FS_CLASS_NAME)
            with open(fullpath, 'wb') as f:
                f.write(data)
            return True
Exemple #28
0
    def test_get_spec_content_from_ref(self):
        mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata')
        specpath = 'dataset-ex'
        m = Metadata(specpath, self.test_dir, config, DATASETS)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath))
        spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec'
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        sha = m.commit(spec_metadata_path, specpath)
        tag = m.tag_add(sha)
        path = 'dataset-ex/dataset-ex.spec'
        content = yaml_load_str(m._get_spec_content_from_ref(tag.commit, path))
        spec_file = yaml_load(spec_metadata_path)
        self.assertEqual(content, spec_file)
Exemple #29
0
 def test_11_add_with_bumpversion_in_older_tag(self):
     repo_type = DATASETS
     entity_name = '{}-ex'.format(repo_type)
     init_repository(repo_type, self)
     entity_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)
     ensure_path_exists(entity_path)
     self._push_tag_to_repositroy(repo_type, entity_path, 'first_tag')
     self._push_tag_to_repositroy(repo_type, entity_path, 'second_tag')
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_CHECKOUT %
                      (repo_type, entity_name + ' --version=1')))
     self._check_spec_version(repo_type, 1)
     add_file(self, repo_type, '--bumpversion', 'third_tag')
     self._check_spec_version(repo_type, 3)
Exemple #30
0
    def link(self, key, srcfile, force=True):
        dstkey = self._get_hashpath(key)
        ensure_path_exists(os.path.dirname(dstkey))
        log.debug(output_messages['DEBUG_LINK_FROM_TO'] % (srcfile, key), class_name=HASH_FS_CLASS_NAME)
        if os.path.exists(dstkey) is True:
            if force is True:
                try:
                    set_write_read(srcfile)
                    os.unlink(srcfile)
                    os.link(dstkey, srcfile)
                except FileNotFoundError as e:
                    log.debug(str(e), class_name=HASH_FS_CLASS_NAME)
                    raise e

            return
        os.link(srcfile, dstkey)