Example #1
0
    def remote_fsck(self, spec, retries=2, thorough=False, paranoid=False):
        repo_type = self.__repo_type
        try:
            metadata_path = get_metadata_path(self.__config, repo_type)
            objects_path = get_objects_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
            ref = Refs(refs_path, spec, repo_type)
            tag, sha = ref.branch()

            categories_path = get_path_with_categories(tag)

            self._checkout_ref(tag)
            spec_path, spec_file = search_spec_file(self.__repo_type, spec,
                                                    categories_path)

        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return
        if spec_path is None:
            return

        full_spec_path = os.path.join(spec_path, spec_file)

        r = LocalRepository(self.__config, objects_path, repo_type)

        r.remote_fsck(metadata_path, tag, full_spec_path, retries, thorough,
                      paranoid)

        # ensure first we're on master !
        self._checkout_ref()
Example #2
0
    def test_remote_fsck(self):
        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        ohfs.put(HDATA_IMG_1)

        s3 = boto3.resource(
            's3',
            region_name='us-east-1',
            aws_access_key_id='fake_access_key',
            aws_secret_access_key='fake_secret_key',
        )

        s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').delete()
        self.assertRaises(botocore.exceptions.ClientError, lambda: self.check_delete(s3, testbucketname))
        mdpath = os.path.join(self.tmp_dir, 'metadata-test')

        dataset_spec = get_sample_spec(testbucketname)
        specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex')
        ensure_path_exists(specpath)

        yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec'))
        manifestpath = os.path.join(specpath, 'MANIFEST.yaml')

        yaml_save({'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'}}, manifestpath)
        fullspecpath = os.path.join(specpath, os.path.join(specpath, 'dataset-ex.spec'))
        spec = 'vision-computing__images__dataset-ex__5'
        c = yaml_load('hdata/config.yaml')
        r = LocalRepository(c, hfspath)
        ret = r.remote_fsck(mdpath, spec, fullspecpath, 2, True, True)
        self.assertTrue(ret)

        self.assertEqual(None, s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').load())