def test_add_file_end_to_end(file_name): test_path = "d_0/d_0.0/f_0.0.0" json.dump({ **metadata_template, "type": "file", "path": test_path }, open(file_name, "tw")) with tempfile.TemporaryDirectory() as temp_dir: git_repo = GitRepo(temp_dir) res = meta_add(metadata=file_name, metadata_store=git_repo.path) assert_result_count(res, 1) assert_result_count(res, 1, type='file') assert_result_count(res, 0, type='dataset') # Verify file level metadata was added tree_version_list, uuid_set, mrr = _get_top_nodes( git_repo, UUID(metadata_template["dataset_id"]), metadata_template["dataset_version"]) file_tree = mrr.get_file_tree() assert_is_not_none(file_tree) assert_true(test_path in file_tree) metadata = file_tree.get_metadata(MetadataPath(test_path)) metadata_content = _get_metadata_content(metadata) eq_(metadata_content, metadata_template["extracted_metadata"])
def _get_metadata_content(metadata): assert_is_not_none(metadata) metadata_instances = tuple(metadata.extractor_runs()) assert_true(len(metadata_instances) == 1) extractor_name, extractor_runs = metadata_instances[0] eq_(extractor_name, metadata_template["extractor_name"]) instances = tuple(extractor_runs.get_instances()) assert_true(len(instances), 1) return instances[0].metadata_content
def test_symlinked_dataset_properties(repo1, repo2, repo3, non_repo, symlink): ds = Dataset(repo1).create() # now, let ds be a symlink and change that symlink to point to different # things: ar2 = AnnexRepo(repo2) ar3 = AnnexRepo(repo3) assert_true(os.path.isabs(non_repo)) os.symlink(repo1, symlink) ds_link = Dataset(symlink) assert_is(ds_link.repo, ds.repo) # same Repo instance assert_is_not(ds_link, ds) # but not the same Dataset instance assert_is(ds_link.config, ds.repo.config) assert_true(ds_link._cfg_bound) assert_is_not_none(ds_link.id) # same id, although different Dataset instance: assert_equal(ds_link.id, ds.id) os.unlink(symlink) os.symlink(repo2, symlink) assert_is(ds_link.repo, ar2) # same Repo instance assert_is(ds_link.config, ar2.config) assert_true(ds_link._cfg_bound) # id is None again, since this repository is an annex but there was no # Dataset.create() called yet. assert_is_none(ds_link.id) os.unlink(symlink) os.symlink(repo3, symlink) assert_is(ds_link.repo, ar3) # same Repo instance assert_is(ds_link.config, ar3.config) assert_true(ds_link._cfg_bound) # id is None again, since this repository is an annex but there was no # Dataset.create() called yet. assert_is_none(ds_link.id) os.unlink(symlink) os.symlink(non_repo, symlink) assert_is_none(ds_link.repo) assert_is_not(ds_link.config, ar3.config) assert_false(ds_link._cfg_bound) assert_is_none(ds_link.id)
def test_subdataset_add_file_end_to_end(file_name): test_path = "d_1/d_1.0/f_1.0.0" json.dump({ **metadata_template, **additional_keys_template, "type": "file", "path": test_path }, open(file_name, "tw")) with tempfile.TemporaryDirectory() as temp_dir: git_repo = GitRepo(temp_dir) res = meta_add(metadata=file_name, metadata_store=git_repo.path) assert_result_count(res, 1) assert_result_count(res, 1, type='file') assert_result_count(res, 0, type='dataset') # Verify dataset level metadata was added root_dataset_id = UUID(additional_keys_template["root_dataset_id"]) root_dataset_version = additional_keys_template["root_dataset_version"] dataset_tree_path = MetadataPath( additional_keys_template["dataset_path"]) tree_version_list, uuid_set, mrr = _get_top_nodes( git_repo, root_dataset_id, root_dataset_version) _, dataset_tree = tree_version_list.get_dataset_tree( root_dataset_version) mrr = dataset_tree.get_metadata_root_record(dataset_tree_path) eq_(mrr.dataset_identifier, UUID(metadata_template["dataset_id"])) file_tree = mrr.get_file_tree() assert_is_not_none(file_tree) assert_true(test_path in file_tree) metadata = file_tree.get_metadata(MetadataPath(test_path)) metadata_content = _get_metadata_content(metadata) eq_(metadata_content, metadata_template["extracted_metadata"])
def _get_top_nodes(git_repo, dataset_id, dataset_version): # Ensure that metadata was created tree_version_list, uuid_set, mrr = \ get_top_nodes_and_metadata_root_record( "git", git_repo.path, dataset_id, dataset_version, MetadataPath("")) assert_is_not_none(tree_version_list) assert_is_not_none(uuid_set) assert_is_not_none(mrr) return tree_version_list, uuid_set, mrr
def test_property_reevaluation(repo1): ds = Dataset(repo1) assert_is_none(ds.repo) assert_is_not_none(ds.config) first_config = ds.config assert_false(ds._cfg_bound) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after creation, we have `repo`, and `config` was reevaluated to point # to the repo's config: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) second_config = ds.config assert_true(ds._cfg_bound) assert_is(ds.config, ds.repo.config) assert_is_not(first_config, second_config) assert_is_not_none(ds.id) first_id = ds.id ds.remove() # repo is gone, and config is again reevaluated to only provide user/system # level config: assert_false(lexists(ds.path)) assert_is_none(ds.repo) assert_is_not_none(ds.config) third_config = ds.config assert_false(ds._cfg_bound) assert_is_not(second_config, third_config) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after recreation everything is sane again: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) assert_is(ds.config, ds.repo.config) forth_config = ds.config assert_true(ds._cfg_bound) assert_is_not(third_config, forth_config) assert_is_not_none(ds.id) assert_not_equal(ds.id, first_id)