Exemple #1
0
    def test_increment_version_in_dataset_spec(self):
        dataset = 'test_dataset'
        dir1 = get_spec_file_dir(dataset)
        dir2 = os.path.join('.ml-git', DATASETS, 'index', 'metadata',
                            dataset)  # Linked path to the original
        os.makedirs(os.path.join(self.tmp_dir, dir1))
        os.makedirs(os.path.join(self.tmp_dir, dir2))
        file1 = os.path.join(self.tmp_dir, dir1, '%s.spec' % dataset)
        file2 = os.path.join(self.tmp_dir, dir2, '%s.spec' % dataset)

        self.assertFalse(increment_version_in_spec(None))

        self.assertFalse(
            increment_version_in_spec(os.path.join(get_root_path(), dataset)))

        spec = yaml_load(os.path.join(testdir, 'invalid2.spec'))
        yaml_save(spec, file1)
        self.assertFalse(
            increment_version_in_spec(os.path.join(get_root_path(), dataset)))

        spec = yaml_load(os.path.join(testdir, 'valid.spec'))
        yaml_save(spec, file1)
        os.link(file1, file2)
        self.assertTrue(
            increment_version_in_spec(
                os.path.join(get_root_path(), self.tmp_dir, DATASETS, dataset,
                             dataset + '.spec')))
Exemple #2
0
    def add(self, spec, file_path, bump_version=False, run_fsck=False):
        repo_type = self.__repo_type

        is_shared_objects = 'objects_path' in self.__config[repo_type]
        is_shared_cache = 'cache_path' in self.__config[repo_type]

        if not validate_config_spec_hash(self.__config):
            log.error(
                '.ml-git/config.yaml invalid. It should look something like this:\n%s'
                % get_yaml_str(
                    get_sample_config_spec('somebucket', 'someprofile',
                                           'someregion')),
                class_name=REPOSITORY_CLASS_NAME)
            return None

        path, file = None, None
        try:

            refs_path = get_refs_path(self.__config, repo_type)
            index_path = get_index_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            cache_path = get_cache_path(self.__config, repo_type)
            objects_path = get_objects_path(self.__config, repo_type)
            repo = LocalRepository(self.__config, objects_path, repo_type)
            mutability, check_mutability = repo.get_mutability_from_spec(
                spec, repo_type)
            sampling_flag = os.path.exists(
                os.path.join(index_path, 'metadata', spec, 'sampling'))
            if sampling_flag:
                log.error(
                    'You cannot add new data to an entity that is based on a checkout with the --sampling option.',
                    class_name=REPOSITORY_CLASS_NAME)
                return

            if not mutability:
                return

            if not check_mutability:
                log.error('Spec mutability cannot be changed.',
                          class_name=REPOSITORY_CLASS_NAME)
                return

            if not self._has_new_data(repo, spec):
                return None

            ref = Refs(refs_path, spec, repo_type)
            tag, sha = ref.branch()

            categories_path = get_path_with_categories(tag)

            path, file = search_spec_file(self.__repo_type, spec,
                                          categories_path)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return

        if path is None:
            return
        spec_path = os.path.join(path, file)
        if not self._is_spec_valid(spec_path):
            return None

        # Check tag before anything to avoid creating unstable state
        log.debug('Repository: check if tag already exists',
                  class_name=REPOSITORY_CLASS_NAME)

        m = Metadata(spec, metadata_path, self.__config, repo_type)

        if not m.check_exists():
            log.error('The %s has not been initialized' % self.__repo_type,
                      class_name=REPOSITORY_CLASS_NAME)
            return

        try:
            m.update()
        except Exception:
            pass

        # get version of current manifest file
        manifest = self._get_current_manifest_file(m, tag)

        try:
            # adds chunks to ml-git Index
            log.info('%s adding path [%s] to ml-git index' % (repo_type, path),
                     class_name=REPOSITORY_CLASS_NAME)
            with change_mask_for_routine(is_shared_objects):
                idx = MultihashIndex(spec, index_path, objects_path,
                                     mutability, cache_path)
                idx.add(path, manifest, file_path)

            # create hard links in ml-git Cache
            self.create_hard_links_in_cache(cache_path, index_path,
                                            is_shared_cache, mutability, path,
                                            spec)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return None

        if bump_version and not increment_version_in_spec(
                spec_path, self.__repo_type):
            return None

        idx.add_metadata(path, file)

        self._check_corrupted_files(spec, repo)

        # Run file check
        if run_fsck:
            self.fsck()