def test_diff_refs_modified_file(self): repo_type = DATASETS mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata') entity = 'dataset-ex' specpath = os.path.join('vision-computer', 'images', entity) config_test = deepcopy(config) config_test['mlgit_path'] = '.ml-git' m = Metadata(entity, mdpath, config_test, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath, '{}.spec'.format(entity))) yaml_save(files_mock, manifestpath) sha1 = m.commit(manifestpath, 'test') files_mock_copy = deepcopy(files_mock) del files_mock_copy[ 'zdj7WZzR8Tw87Dx3dm76W5aehnT23GSbXbQ9qo73JgtwREGwB'] files_mock_copy['NewHash'] = {'7.jpg'} yaml_save(files_mock_copy, manifestpath) sha2 = m.commit(manifestpath, 'test') added_files, deleted_files, modified_file = m.diff_refs_with_modified_files( entity, sha1, sha2) self.assertTrue(len(added_files) == 0) self.assertTrue(len(deleted_files) == 0) self.assertTrue(len(modified_file) == 1)
def test_diff_refs_add_file(self): repo_type = DATASETS mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata') entity = 'dataset-ex' specpath = os.path.join('vision-computer', 'images', entity) config_test = deepcopy(config) config_test['mlgit_path'] = '.ml-git' m = Metadata(entity, mdpath, config_test, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath, '{}.spec'.format(entity))) yaml_save(files_mock, manifestpath) sha1 = m.commit(manifestpath, 'test') files_mock_copy = deepcopy(files_mock) files_mock_copy[ 'zPaksM5tNewHashQ2VABPvvfC3VW6wFRTWKvFhUW5QaDx6JMoma'] = { '11.jpg' } yaml_save(files_mock_copy, manifestpath) sha2 = m.commit(manifestpath, 'test') added_files, deleted_files, modified_file = m.diff_refs_with_modified_files( entity, sha1, sha2) self.assertTrue(len(added_files) == 1) self.assertTrue(len(deleted_files) == 0) self.assertTrue(len(modified_file) == 0)
def clone_config_repository(url, folder, track): try: if get_root_path(): log.error(output_messages['ERROR_IN_INTIALIZED_PROJECT'], class_name=ADMIN_CLASS_NAME) return False except RootPathException: pass git_dir = '.git' try: if folder is not None: project_dir = os.path.join(os.getcwd(), folder) ensure_path_exists(project_dir) else: project_dir = os.getcwd() if len(os.listdir(project_dir)) != 0: log.error(output_messages['ERROR_PATH_NOT_EMPTY'] % project_dir, class_name=ADMIN_CLASS_NAME) return False Repo.clone_from(url, project_dir) except Exception as e: error_msg = handle_clone_exception(e, folder, project_dir) log.error(error_msg, class_name=ADMIN_CLASS_NAME) return False if not check_successfully_clone(project_dir, git_dir): return False if not track: clear(os.path.join(project_dir, git_dir)) return True
def donwload_folder(self, file_path, folder_id): files_in_folder = self.list_files_in_folder(folder_id) for file in files_in_folder: complete_file_path = os.path.join(file_path, file.get('name')) ensure_path_exists(file_path) self.download_file(complete_file_path, file)
def test_remote_fsck(self): testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets') hfspath = os.path.join(self.tmp_dir, 'objectsfs') ohfs = MultihashFS(hfspath) ohfs.put(HDATA_IMG_1) s3 = boto3.resource( 's3', region_name='us-east-1', aws_access_key_id='fake_access_key', aws_secret_access_key='fake_secret_key', ) s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').delete() self.assertRaises(botocore.exceptions.ClientError, lambda: self.check_delete(s3, testbucketname)) mdpath = os.path.join(self.tmp_dir, 'metadata-test') dataset_spec = get_sample_spec(testbucketname) specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex') ensure_path_exists(specpath) yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec')) manifestpath = os.path.join(specpath, 'MANIFEST.yaml') yaml_save({'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'}}, manifestpath) fullspecpath = os.path.join(specpath, os.path.join(specpath, 'dataset-ex.spec')) spec = 'vision-computing__images__dataset-ex__5' c = yaml_load('hdata/config.yaml') r = LocalRepository(c, hfspath) ret = r.remote_fsck(mdpath, spec, fullspecpath, 2, True, True) self.assertTrue(ret) self.assertEqual(None, s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').load())
def test_fetch(self): mdpath = os.path.join(self.tmp_dir, 'metadata-test') testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets') config_spec = get_sample_config_spec(testbucketname, testprofile, testregion) dataset_spec = get_sample_spec(testbucketname) specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex') ensure_path_exists(specpath) yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec')) manifestpath = os.path.join(specpath, 'MANIFEST.yaml') yaml_save( { 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'} }, manifestpath) objectpath = os.path.join(self.tmp_dir, 'objects-test') spec = 'vision-computing__images__dataset-ex__5' r = LocalRepository(config_spec, objectpath) r.fetch(mdpath, spec, None) fs = set() for root, dirs, files in os.walk(objectpath): for file in files: fs.add(file) self.assertEqual(len(hs), len(fs)) self.assertTrue(len(hs.difference(fs)) == 0)
def test_05_hard_with_data_in_subpath(self): entity = DATASETS subpath = 'data' init_repository(entity, self) first_commit_file_name = 'file1' data_path = os.path.join(entity, entity+'-ex', subpath) ensure_path_exists(data_path) create_file(data_path, first_commit_file_name, '0', '') self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion'))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', ''))) second_commit_file_name = 'file2' create_file(data_path, second_commit_file_name, '1', '') self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion'))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', ''))) self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name))) self.assertTrue(os.path.exists(os.path.join(data_path, second_commit_file_name))) self.assertIn(output_messages['INFO_INITIALIZING_RESET'] % ('--hard', 'HEAD~1'), check_output(MLGIT_RESET % (entity, entity+'-ex') + ' --hard --reference=head~1')) self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name))) self.assertFalse(os.path.exists(os.path.join(data_path, second_commit_file_name))) self.assertRegex(check_output(MLGIT_STATUS % (entity, entity+'-ex')), r'Changes to be committed:\n\tNew file: datasets-ex.spec\n\nUntracked files:\n\nCorrupted files') self._check_dir(self.dataset_tag)
def test_10_checkout_with_unsaved_work(self): entity = DATASETS init_repository(entity, self) self._create_new_tag(entity, 'tag1') entity_dir = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) with open(os.path.join(entity_dir, 'tag2'), 'wt') as z: z.write('0' * 100) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ''))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity + '-ex', '--version=3'))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex'))) unsaved_file_dir = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'folderA') ensure_path_exists(unsaved_file_dir) with open(os.path.join(unsaved_file_dir, 'test-unsaved-file'), 'wt') as z: z.write('0' * 100) output_command = check_output( MLGIT_CHECKOUT % (DATASETS, DATASET_NAME + ' --version=2')) self.assertIn(output_messages['ERROR_DISCARDED_LOCAL_CHANGES'], output_command) self.assertIn('test-unsaved-file', output_command)
def test_get_metrics(self): repo_type = MODELS mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata') specpath = os.path.join('vision-computer', 'images') entity = 'model-ex' m = Metadata(entity, self.test_dir, config, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) spec_metadata_path = os.path.join(mdpath, specpath, entity, 'model-ex.spec') shutil.copy('hdata/dataset-ex.spec', spec_metadata_path) spec_file = yaml_load(spec_metadata_path) spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY]) del spec_file[DATASET_SPEC_KEY] spec_file[MODEL_SPEC_KEY]['metrics'] = {'metric_1': 0, 'metric_2': 1} yaml_save(spec_file, spec_metadata_path) tag = 'vision-computer__images__model-ex__1' sha = m.commit(spec_metadata_path, specpath) m.tag_add(tag) metrics = m._get_metrics(entity, sha) test_table = PrettyTable() test_table.field_names = ['Name', 'Value'] test_table.align['Name'] = 'l' test_table.align['Value'] = 'l' test_table.add_row(['metric_1', 0]) test_table.add_row(['metric_2', 1]) test_metrics = '\nmetrics:\n{}'.format(test_table.get_string()) self.assertEqual(metrics, test_metrics)
def move_entity_to_dir(tmp_dir, artifact_name, entity_type): workspace = os.path.join(tmp_dir, entity_type, artifact_name) entity_dir = os.path.join('folderA') workspace_with_dir = os.path.join(tmp_dir, entity_type, entity_dir) ensure_path_exists(workspace_with_dir) shutil.move(workspace, workspace_with_dir) return entity_dir, workspace, workspace_with_dir
def test_05_hard_with_data_in_subpath(self): entity = DATASETS subpath = 'data' init_repository(entity, self) first_commit_file_name = 'file1' data_path = os.path.join(entity, entity+'-ex', subpath) ensure_path_exists(data_path) create_file(data_path, first_commit_file_name, '0', '') self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion'))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', ''))) second_commit_file_name = 'file2' create_file(data_path, second_commit_file_name, '1', '') self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity+'-ex', '--bumpversion'))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity+'-ex', ''))) self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name))) self.assertTrue(os.path.exists(os.path.join(data_path, second_commit_file_name))) self.assertIn(output_messages['INFO_INITIALIZING_RESET'] % ('--hard', 'HEAD~1'), check_output(MLGIT_RESET % (entity, entity+'-ex') + ' --hard --reference=head~1')) self.assertTrue(os.path.exists(os.path.join(data_path, first_commit_file_name))) self.assertFalse(os.path.exists(os.path.join(data_path, second_commit_file_name))) self.assertRegex(check_output(MLGIT_STATUS % (entity, entity+'-ex')), DATASET_UNPUBLISHED_COMMITS_INFO_REGEX.format(unpublished_commits=1, pluralize_char='') + DATASET_PUSH_INFO_REGEX + r'Changes to be committed:\s+' r'New file: datasets-ex.spec') self._check_dir(self.dataset_tag)
def move_metadata_dir(self, old_directory, new_directory): repo = Repo(self.__path) old_path = os.path.join(self.__path, old_directory) new_path = os.path.join(self.__path, os.path.dirname(new_directory)) ensure_path_exists(new_path) repo.git.mv([old_path, new_path]) if not os.listdir(os.path.dirname(old_path)): clear(os.path.dirname(old_path))
def get(self, objectkey, path, file): log.info('Getting file [%s] from local index' % file, class_name=MULTI_HASH_CLASS_NAME) dirs = os.path.dirname(file) fulldir = os.path.join(path, dirs) ensure_path_exists(fulldir) dstfile = os.path.join(path, file) return self._hfs.get(objectkey, dstfile)
def put(self, srcfile): dstfile = self._get_hashpath(os.path.basename(srcfile)) ensure_path_exists(os.path.dirname(dstfile)) os.link(srcfile, dstfile) fullpath = os.path.join(self._logpath, STORAGE_LOG) with open(fullpath, 'a') as log_file: self._log(dstfile, log_file=log_file) return os.path.basename(srcfile)
def get(self, objectkey, path, file): log.info(output_messages['INFO_GETTING_FILE'] % file, class_name=MULTI_HASH_CLASS_NAME) dirs = os.path.dirname(file) fulldir = os.path.join(path, dirs) ensure_path_exists(fulldir) dstfile = os.path.join(path, file) return self._hfs.get(objectkey, dstfile)
def ilink(self, key, dstfile): srckey = self._get_hashpath(key) ensure_path_exists(os.path.dirname(dstfile)) log.debug(output_messages['DEBUG_LINK_FROM_TO'] % (srckey, dstfile), class_name=HASH_FS_CLASS_NAME) if os.path.exists(dstfile) is True: set_write_read(dstfile) os.unlink(dstfile) os.link(srckey, dstfile)
def ilink(self, key, dstfile): srckey = self._get_hashpath(key) ensure_path_exists(os.path.dirname(dstfile)) log.debug('Link from [%s] to [%s]' % (srckey, dstfile), class_name=HASH_FS_CLASS_NAME) if os.path.exists(dstfile) is True: set_write_read(dstfile) os.unlink(dstfile) os.link(srckey, dstfile)
def test_get_update_links_wspace_with_duplicates(self): wspath = os.path.join(self.tmp_dir, 'wspace') hfspath = os.path.join(self.tmp_dir, 'objectsfs') ohfs = MultihashFS(hfspath) key = ohfs.put(HDATA_IMG_1) fidx = FullIndex(self.tmp_dir, self.tmp_dir) cachepath = os.path.join(self.tmp_dir, 'cachefs') cache = Cache(cachepath, '', '') testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets') c = get_sample_config_spec(testbucketname, testprofile, testregion) r = LocalRepository(c, hfspath) r._update_cache(cache, key) mfiles = {} files = {DATA_IMG_1, DATA_IMG_2} r._update_links_wspace(cache, fidx, files, key, wspath, mfiles, Status.u.name, 'strict') wspace_file = os.path.join(wspath, DATA_IMG_1) self.assertTrue(os.path.exists(wspace_file)) self.assertEqual(self.md5sum(HDATA_IMG_1), self.md5sum(wspace_file)) wspace_file = os.path.join(wspath, DATA_IMG_2) self.assertTrue(os.path.exists(wspace_file)) self.assertEqual(self.md5sum(HDATA_IMG_1), self.md5sum(wspace_file)) st = os.stat(wspace_file) self.assertTrue(st.st_nlink == 3) self.assertEqual(mfiles, {DATA_IMG_1: 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh', DATA_IMG_2: 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh'}) wspath = os.path.join(self.tmp_dir, 'wspace') ensure_path_exists(wspath) to_be_removed = os.path.join(wspath, 'to_be_removed') with open(to_be_removed, 'w') as f: f.write('DEAD\n') hfspath = os.path.join(self.tmp_dir, 'objectsfs') ohfs = MultihashFS(hfspath) key = ohfs.put(HDATA_IMG_1) fidx = FullIndex(self.tmp_dir, self.tmp_dir) cachepath = os.path.join(self.tmp_dir, 'cachefs') cache = Cache(cachepath, '', '') c = yaml_load('hdata/config.yaml') r = LocalRepository(c, hfspath) r._update_cache(cache, key) mfiles = {} files = {DATA_IMG_1, DATA_IMG_2} r._update_links_wspace(cache, fidx, files, key, wspath, mfiles, Status.u.name, 'strict') r._remove_unused_links_wspace(wspath, mfiles) self.assertFalse(os.path.exists(to_be_removed))
def clone_config_repository(url, folder, track): try: if get_root_path(): log.error('You are in initialized ml-git project.', class_name=ADMIN_CLASS_NAME) return False except RootPathException: pass git_dir = '.git' try: if folder is not None: project_dir = os.path.join(os.getcwd(), folder) ensure_path_exists(project_dir) else: project_dir = os.getcwd() if len(os.listdir(project_dir)) != 0: log.error( 'The path [%s] is not an empty directory. Consider using --folder to create an empty folder.' % project_dir, class_name=ADMIN_CLASS_NAME) return False Repo.clone_from(url, project_dir) except Exception as e: error_msg = str(e) if (e.__class__ == GitCommandError and 'Permission denied' in str( e.args[2])) or e.__class__ == PermissionError: error_msg = 'Permission denied in folder %s' % project_dir else: if folder is not None: clear(project_dir) if e.__class__ == GitCommandError: error_msg = 'Could not read from remote repository.' log.error(error_msg, class_name=ADMIN_CLASS_NAME) return False try: os.chdir(project_dir) get_root_path() except RootPathException: clear(project_dir) log.error('Wrong minimal configuration files!', class_name=ADMIN_CLASS_NAME) clear(git_dir) return False if not track: clear(os.path.join(project_dir, git_dir)) return True
def commit_metadata(self, index_path, tags, commit_msg, changed_files, mutability, ws_path): spec_file = os.path.join(index_path, 'metadata', self._spec, self._spec + SPEC_EXTENSION) full_metadata_path, categories_sub_path, metadata = self._full_metadata_path( spec_file) log.debug('Metadata path [%s]' % full_metadata_path, class_name=METADATA_CLASS_NAME) if full_metadata_path is None: return None, None elif categories_sub_path is None: return None, None ensure_path_exists(full_metadata_path) ret = self.__commit_manifest(full_metadata_path, index_path, changed_files, mutability) if ret is False: log.info('No files to commit for [%s]' % self._spec, class_name=METADATA_CLASS_NAME) return None, None try: self.__commit_metadata(full_metadata_path, index_path, metadata, tags, ws_path) except Exception: return None, None # generates a tag to associate to the commit tag = self.metadata_tag(metadata) # check if tag already exists in the ml-git repository tags = self._tag_exists(tag) if len(tags) > 0: log.error( 'Tag [%s] already exists in the ml-git repository. ' 'Consider using --bumpversion parameter to increment the version number for your dataset.' % tag, class_name=METADATA_CLASS_NAME) for t in tags: log.error('\t%s' % t) return None, None if commit_msg is not None and len(commit_msg) > 0: msg = commit_msg else: # generates a commit message msg = self.metadata_message(metadata) log.debug('Commit message [%s]' % msg, class_name=METADATA_CLASS_NAME) sha = self.commit(categories_sub_path, msg) self.tag_add(tag) return str(tag), str(sha)
def _store_chunk(self, filename, data): fullpath = self._get_hashpath(filename) ensure_path_exists(os.path.dirname(fullpath)) if os.path.isfile(fullpath) is True: log.debug(output_messages['DEBUG_CHUNK_ALREADY_EXISTS'] % (filename, len(data)), class_name=HASH_FS_CLASS_NAME) return False if data is not None: log.debug(output_messages['DEBUG_ADDING_CHUNK'] % (filename, len(data)), class_name=HASH_FS_CLASS_NAME) with open(fullpath, 'wb') as f: f.write(data) return True
def test_remove_from_workspace(self): img = 'image.jpg' data_path = os.path.join(self.tmp_dir, 'data') ensure_path_exists(data_path) file1 = os.path.join(self.tmp_dir, img) file2 = os.path.join(data_path, img) with open(file1, 'w'), open(file2, 'w'): pass self.assertTrue(os.path.exists(file1)) self.assertTrue(os.path.exists(file2)) remove_from_workspace({img}, self.tmp_dir, 'dataex') self.assertFalse(os.path.exists(file1)) self.assertFalse(os.path.exists(file2))
def test_09_push_entity_with_dir(self): init_repository(DATASETS, self) workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) self.assertTrue(os.path.exists(workspace)) entity_dir = os.path.join('folderA', 'folderB') workspace_with_dir = os.path.join(self.tmp_dir, DATASETS, entity_dir) ensure_path_exists(workspace_with_dir) shutil.move(workspace, workspace_with_dir) add_file(self, DATASETS, '--bumpversion', 'new', entity_dir=entity_dir) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ''))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME))) self.assertTrue(os.path.exists(workspace_with_dir)) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata', entity_dir)))
def add_metadata(self, basepath, filepath): log.debug('Add file [%s] to ml-git index' % filepath, class_name=MULTI_HASH_CLASS_NAME) fullpath = os.path.join(basepath, filepath) metadatapath = os.path.join(self._path, 'metadata', self._spec) ensure_path_exists(metadatapath) dstpath = os.path.join(metadatapath, filepath) if not os.path.exists(dstpath): shutil.copy2(fullpath, dstpath) else: os.unlink(dstpath) shutil.copy2(fullpath, dstpath)
def commit_metadata(self, index_path, tags, commit_msg, changed_files, mutability, ws_path): spec_file = os.path.join(index_path, 'metadata', self._spec, self._spec + SPEC_EXTENSION) full_metadata_path, entity_sub_path, metadata = self._full_metadata_path( spec_file) log.debug(output_messages['DEBUG_METADATA_PATH'] % full_metadata_path, class_name=METADATA_CLASS_NAME) if full_metadata_path is None: return None, None elif entity_sub_path is None: return None, None ensure_path_exists(full_metadata_path) ret = self.__commit_manifest(full_metadata_path, index_path, changed_files, mutability) if ret is False: log.info(output_messages['INFO_NO_FILES_COMMIT_FOR'] % self._spec, class_name=METADATA_CLASS_NAME) return None, None try: self.__commit_metadata(full_metadata_path, index_path, metadata, tags, ws_path) except Exception: return None, None # generates a tag to associate to the commit tag = self.metadata_tag(metadata) # check if tag already exists in the ml-git repository tags = self._tag_exists(tag) if len(tags) > 0: log.error(output_messages[ 'ERROR_TAG_ALREADY_EXISTS_CONSIDER_USER_VERSION'] % tag, class_name=METADATA_CLASS_NAME) for t in tags: log.error(output_messages['ERROR_METADATA_MESSAGE'] % t) return None, None if commit_msg is not None and len(commit_msg) > 0: msg = commit_msg else: # generates a commit message msg = self.metadata_message(metadata) log.debug(output_messages['DEBUG_COMMIT_MESSAGE'] % msg, class_name=METADATA_CLASS_NAME) sha = self.commit(entity_sub_path, msg) self.tag_add(tag) return str(tag), str(sha)
def add_metadata(self, basepath, filepath): log.debug(output_messages['DEBUG_ADD_FILE'] % filepath, class_name=MULTI_HASH_CLASS_NAME) fullpath = os.path.join(basepath, filepath) metadatapath = os.path.join(self._path, 'metadata', self._spec) ensure_path_exists(metadatapath) dstpath = os.path.join(metadatapath, filepath) if not os.path.exists(dstpath): shutil.copy2(fullpath, dstpath) else: os.unlink(dstpath) shutil.copy2(fullpath, dstpath)
def _store_chunk(self, filename, data): fullpath = self._get_hashpath(filename) ensure_path_exists(os.path.dirname(fullpath)) if os.path.isfile(fullpath) is True: log.debug('Chunk [%s]-[%d] already exists' % (filename, len(data)), class_name=HASH_FS_CLASS_NAME) return False if data is not None: log.debug('Add chunk [%s]-[%d]' % (filename, len(data)), class_name=HASH_FS_CLASS_NAME) with open(fullpath, 'wb') as f: f.write(data) return True
def test_get_spec_content_from_ref(self): mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata') specpath = 'dataset-ex' m = Metadata(specpath, self.test_dir, config, DATASETS) m.init() ensure_path_exists(os.path.join(mdpath, specpath)) spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec' shutil.copy('hdata/dataset-ex.spec', spec_metadata_path) sha = m.commit(spec_metadata_path, specpath) tag = m.tag_add(sha) path = 'dataset-ex/dataset-ex.spec' content = yaml_load_str(m._get_spec_content_from_ref(tag.commit, path)) spec_file = yaml_load(spec_metadata_path) self.assertEqual(content, spec_file)
def test_11_add_with_bumpversion_in_older_tag(self): repo_type = DATASETS entity_name = '{}-ex'.format(repo_type) init_repository(repo_type, self) entity_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) ensure_path_exists(entity_path) self._push_tag_to_repositroy(repo_type, entity_path, 'first_tag') self._push_tag_to_repositroy(repo_type, entity_path, 'second_tag') self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (repo_type, entity_name + ' --version=1'))) self._check_spec_version(repo_type, 1) add_file(self, repo_type, '--bumpversion', 'third_tag') self._check_spec_version(repo_type, 3)
def link(self, key, srcfile, force=True): dstkey = self._get_hashpath(key) ensure_path_exists(os.path.dirname(dstkey)) log.debug(output_messages['DEBUG_LINK_FROM_TO'] % (srcfile, key), class_name=HASH_FS_CLASS_NAME) if os.path.exists(dstkey) is True: if force is True: try: set_write_read(srcfile) os.unlink(srcfile) os.link(dstkey, srcfile) except FileNotFoundError as e: log.debug(str(e), class_name=HASH_FS_CLASS_NAME) raise e return os.link(srcfile, dstkey)