def test_put(self): idx = MultihashIndex('dataset-spec', self.tmp_dir, self.tmp_dir) idx.add('data', self.tmp_dir) mf = idx.get_index() self.assertTrue(mf.exists('zdj7WgHSKJkoJST5GWGgS53ARqV7oqMGYVvWzEWku3MBfnQ9u')) idx.add('image.jpg', self.tmp_dir) idx.update_index('zdj7WemKEtQMVL81UU6PSuYaoxvBQ6CiUMq1fMvoXBhPUsCK2', 'image.jpg') self.assertTrue(mf.exists('zdj7WemKEtQMVL81UU6PSuYaoxvBQ6CiUMq1fMvoXBhPUsCK2'))
def test_push(self): mlgit_dir = os.path.join(self.tmp_dir, '.ml-git') indexpath = os.path.join(mlgit_dir, 'index-test') mdpath = os.path.join(mlgit_dir, 'metadata-test') objectpath = os.path.join(mlgit_dir, 'objects-test') specpath = os.path.join(mdpath, 'vision-computing/images/dataset-ex') ensure_path_exists(specpath) ensure_path_exists(indexpath) shutil.copy('hdata/dataset-ex.spec', specpath + '/dataset-ex.spec') shutil.copy('hdata/config.yaml', mlgit_dir + '/config.yaml') manifestpath = os.path.join(specpath, 'MANIFEST.yaml') yaml_save( { 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'} }, manifestpath) # adds chunks to ml-git Index idx = MultihashIndex(specpath, indexpath, objectpath) idx.add('data-test-push/', manifestpath) fi = yaml_load(os.path.join(specpath, 'INDEX.yaml')) self.assertTrue(len(fi) > 0) self.assertTrue(os.path.exists(indexpath)) o = Objects(specpath, objectpath) o.commit_index(indexpath, self.tmp_dir) self.assertTrue(os.path.exists(objectpath)) c = yaml_load('hdata/config.yaml') r = LocalRepository(c, objectpath) r.push(objectpath, specpath + '/dataset-ex.spec') s3 = boto3.resource( 's3', region_name='eu-west-1', aws_access_key_id='fake_access_key', aws_secret_access_key='fake_secret_key', ) for key in idx.get_index(): self.assertIsNotNone(s3.Object(testbucketname, key))
def reset(self, spec, reset_type, head): log.info(output_messages['INFO_INITIALIZING_RESET'] % (reset_type, head), class_name=REPOSITORY_CLASS_NAME) if (reset_type == '--soft' or reset_type == '--mixed') and head == HEAD: return try: repo_type = self.__repo_type metadata_path = get_metadata_path(self.__config, repo_type) index_path = get_index_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) object_path = get_objects_path(self.__config, repo_type) met = Metadata(spec, metadata_path, self.__config, repo_type) ref = Refs(refs_path, spec, repo_type) idx = MultihashIndex(spec, index_path, object_path) fidx = FullIndex(spec, index_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return # get tag before reset tag = met.get_current_tag() categories_path = get_path_with_categories(str(tag)) # current manifest file before reset manifest_path = os.path.join(metadata_path, categories_path, spec, MANIFEST_FILE) _manifest = Manifest(manifest_path).load() if head == HEAD_1: # HEAD~1 try: # reset the repo met.reset() except Exception: return # get tag after reset tag_after_reset = met.get_current_tag() sha = met.sha_from_tag(tag_after_reset) # update ml-git ref HEAD ref.update_head(str(tag_after_reset), sha) # # get path to reset workspace in case of --hard path, file = None, None try: path, file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) if reset_type == '--hard' and path is None: return # get manifest from metadata after reset _manifest_changed = Manifest(manifest_path) hash_files, file_names = _manifest_changed.get_diff(_manifest) idx_mf = idx.get_index().load() if reset_type == '--soft': # add in index/metadata/<entity-name>/MANIFEST idx.update_index_manifest(idx_mf) idx.update_index_manifest(hash_files) fidx.update_index_status(file_names, Status.a.name) else: # --hard or --mixed # remove hash from index/hashsh/store.log file_names.update(*idx_mf.values()) objs = MultihashFS(index_path) for key_hash in hash_files: objs.remove_hash(key_hash) idx.remove_manifest() fidx.remove_from_index_yaml(file_names) fidx.remove_uncommitted() if reset_type == '--hard': # reset workspace remove_from_workspace(file_names, path, spec)