def remote_fsck(self, spec, retries=2, thorough=False, paranoid=False): repo_type = self.__repo_type try: metadata_path = get_metadata_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) ref = Refs(refs_path, spec, repo_type) tag, sha = ref.branch() categories_path = get_path_with_categories(tag) self._checkout_ref(tag) spec_path, spec_file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return if spec_path is None: return full_spec_path = os.path.join(spec_path, spec_file) r = LocalRepository(self.__config, objects_path, repo_type) r.remote_fsck(metadata_path, tag, full_spec_path, retries, thorough, paranoid) # ensure first we're on master ! self._checkout_ref()
def test_remote_fsck(self): testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets') hfspath = os.path.join(self.tmp_dir, 'objectsfs') ohfs = MultihashFS(hfspath) ohfs.put(HDATA_IMG_1) s3 = boto3.resource( 's3', region_name='us-east-1', aws_access_key_id='fake_access_key', aws_secret_access_key='fake_secret_key', ) s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').delete() self.assertRaises(botocore.exceptions.ClientError, lambda: self.check_delete(s3, testbucketname)) mdpath = os.path.join(self.tmp_dir, 'metadata-test') dataset_spec = get_sample_spec(testbucketname) specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex') ensure_path_exists(specpath) yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec')) manifestpath = os.path.join(specpath, 'MANIFEST.yaml') yaml_save({'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'}}, manifestpath) fullspecpath = os.path.join(specpath, os.path.join(specpath, 'dataset-ex.spec')) spec = 'vision-computing__images__dataset-ex__5' c = yaml_load('hdata/config.yaml') r = LocalRepository(c, hfspath) ret = r.remote_fsck(mdpath, spec, fullspecpath, 2, True, True) self.assertTrue(ret) self.assertEqual(None, s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').load())