def test_addurls_invalid_input(self, path): ds = Dataset(path).create(force=True) in_file = op.join(path, "in") for in_type in ["csv", "json"]: with assert_raises(IncompleteResultsError) as exc: ds.addurls(in_file, "{url}", "{name}", input_type=in_type) assert_in("Failed to read", str(exc.exception))
def test_addurls_url_on_collision_error_if_different(self=None, path=None): ds = Dataset(path).create(force=True) data = [self.data[0].copy(), self.data[0].copy()] data[0]["some_metadata"] = "1" data[1]["some_metadata"] = "2" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}", exclude_autometa="*", on_collision="error-if-different") ok_exists(op.join(ds.path, "a"))
def test_addurls_url_parts(self, path): ds = Dataset(path).create(force=True) with chpwd(path): ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}") for fname in ["a.dat", "b.dat", "c.dat"]: ok_exists(op.join("udir", fname))
def test_addurls_dry_run(path): ds = Dataset(path).create(force=True) json_file = "links.json" with open(op.join(ds.path, json_file), "w") as jfh: json.dump([{ "url": "URL/a.dat", "name": "a", "subdir": "foo" }, { "url": "URL/b.dat", "name": "b", "subdir": "bar" }, { "url": "URL/c.dat", "name": "c", "subdir": "foo" }], jfh) ds.save(message="setup") with swallow_logs(new_level=logging.INFO) as cml: ds.addurls(json_file, "{url}", "{subdir}//{_url_filename_root}", dry_run=True) for dir_ in ["foo", "bar"]: assert_in("Would create a subdataset at {}".format(dir_), cml.out) assert_in( "Would download URL/a.dat to {}".format( os.path.join(path, "foo", "BASE")), cml.out) assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]), cml.out)
def test_addurls_version(self=None, path=None): ds = Dataset(path).create(force=True) def version_fn(url): if url.endswith("b.dat"): raise ValueError("Scheme error") return url + ".v1" with patch("datalad.local.addurls.get_versioned_url", version_fn): with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "{url}", "{name}", version_urls=True, result_renderer='disabled') assert_in("b.dat", str(cml.out)) names = ["a", "c"] for fname in names: ok_exists(os.path.join(path, fname)) whereis = ds.repo.whereis(names, output="full") for fname, info in whereis.items(): eq_(info[WEB_SPECIAL_REMOTE_UUID]['urls'], ["{}udir/{}.dat.v1".format(self.url, fname)])
def test_addurls_unknown_placeholder(path): ds = Dataset(path).create(force=True) # Close but wrong URL placeholder with assert_raises(IncompleteResultsError) as exc: ds.addurls("in.csv", "{link}", "{abcd}", dry_run=True) assert_in("linky", str(exc.exception)) # Close but wrong file name placeholder with assert_raises(IncompleteResultsError) as exc: ds.addurls("in.csv", "{linky}", "{abc}", dry_run=True) assert_in("abcd", str(exc.exception)) # Out-of-bounds index. with assert_raises(IncompleteResultsError) as exc: ds.addurls("in.csv", "{linky}", "{3}", dry_run=True) assert_in("index", str(exc.exception)) # Suggestions also work for automatic file name placeholders with assert_raises(IncompleteResultsError) as exc: ds.addurls("in.csv", "{linky}", "{_url_hostnam}", dry_run=True) assert_in("_url_hostname", str(exc.exception)) # ... though if you whiff on the beginning prefix, we don't suggest # anything because we decide to generate those fields based on detecting # the prefix. with assert_raises(IncompleteResultsError) as exc: ds.addurls("in.csv", "{linky}", "{_uurl_hostnam}", dry_run=True) assert_not_in("_url_hostname", str(exc.exception))
def test_addurls_url_parts(self, path): ds = Dataset(path).create(force=True) with chpwd(path): ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}") for fname in ["udir/a.dat", "udir/b.dat", "udir/c.dat"]: ok_exists(fname)
def check_addurls_from_key(self, key_arg, expected_backend, fake_dates, path): ds = Dataset(path).create(force=True, fake_dates=fake_dates) if OLD_EXAMINEKEY and ds.repo.is_managed_branch(): raise SkipTest("Adjusted branch functionality requires " "more recent `git annex examinekey`") ds.addurls(self.json_file, "{url}", "{name}", exclude_autometa="*", key=key_arg, result_renderer='disabled') repo = ds.repo repo_path = ds.repo.pathobj paths = [repo_path / x for x in "ac"] annexinfo = repo.get_content_annexinfo(eval_availability=True) for path in paths: pstat = annexinfo[path] eq_(pstat["backend"], expected_backend) assert_false(pstat["has_content"]) get_res = ds.get(paths, result_renderer='disabled', on_failure="ignore") assert_result_count(get_res, 2, action="get", status="ok")
def test_addurls_row_missing_key_fields(self=None, path=None): ds = Dataset(path).create(force=True) if OLD_EXAMINEKEY and ds.repo.is_managed_branch(): raise SkipTest("Adjusted branch functionality requires " "more recent `git annex examinekey`") data = deepcopy(self.data) for row in data: if row["name"] == "b": del row["md5sum"] break with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}", exclude_autometa="*", key="MD5-s{size}--{md5sum}", result_renderer='disabled') repo = ds.repo repo_path = ds.repo.pathobj paths = [repo_path / x for x in "ac"] annexinfo = repo.get_content_annexinfo(eval_availability=True) for path in paths: pstat = annexinfo[path] eq_(pstat["backend"], "MD5") assert_false(pstat["has_content"])
def test_addurls_url_parts(self, path): ds = Dataset(path).rev_create(force=True) with chpwd(path): ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}") for fname in ["a.dat", "b.dat", "c.dat"]: ok_exists(op.join("udir", fname))
def test_addurls_dry_run(path): ds = Dataset(path).create(force=True) with chpwd(path): json_file = "links.json" with open(json_file, "w") as jfh: json.dump([{"url": "URL/a.dat", "name": "a", "subdir": "foo"}, {"url": "URL/b.dat", "name": "b", "subdir": "bar"}, {"url": "URL/c.dat", "name": "c", "subdir": "foo"}], jfh) ds.save(message="setup") with swallow_logs(new_level=logging.INFO) as cml: ds.addurls(json_file, "{url}", "{subdir}//{_url_filename_root}", dry_run=True) for dir_ in ["foo", "bar"]: assert_in("Would create a subdataset at {}".format(dir_), cml.out) assert_in( "Would download URL/a.dat to {}".format( os.path.join(path, "foo", "BASE")), cml.out) assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]), cml.out)
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_addurls_url_filename(self=None, path=None): ds = Dataset(path).create(force=True) ds.addurls(self.json_file, "{url}", "{_url0}/{_url_filename}", result_renderer='disabled') for fname in ["a.dat", "b.dat", "c.dat"]: ok_exists(op.join(ds.path, "udir", fname))
def test_addurls_nonannex_repo(path=None): ds = Dataset(path).create(force=True, annex=False) with assert_raises(IncompleteResultsError) as raised: ds.addurls("dummy_arg0", "dummy_arg1", "dummy_arg2", result_renderer='disabled') assert_in("not an annex repo", str(raised.value))
def test_addurls_dropped_urls(self=None, path=None): ds = Dataset(path).create(force=True) with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "", "{subdir}//{name}", result_renderer='disabled') assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL", str(cml.out))
def check_addurls_stdin_input(self, input_text, input_type, path): ds = Dataset(path).create(force=True) with patch("sys.stdin", new=StringIO(input_text)): ds.addurls("-", "{url}", "{name}", input_type=input_type, result_renderer='disabled') for fname in ["a", "b", "c"]: ok_exists(op.join(ds.path, fname))
def test_addurls_invalid_input(self=None, path=None): ds = Dataset(path).create(force=True) in_file = op.join(path, "in") for in_type in au.INPUT_TYPES: with assert_raises(IncompleteResultsError) as exc: ds.addurls(in_file, "{url}", "{name}", input_type=in_type, result_renderer='disabled') assert_in("Failed to read", str(exc.value))
def test_addurls_subdataset(self=None, path=None): ds = Dataset(path).create(force=True) for save in True, False: label = "save" if save else "nosave" with swallow_outputs() as cmo: ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save, cfg_proc=["yoda"]) # The custom result renderer transforms the subdataset # action=create results into something more informative than # "create(ok): . (dataset)"... assert_in("create(ok): foo-{} (dataset)".format(label), cmo.out) # ... and that doesn't lose the standard summary. assert_in("create (ok: 2)", cmo.out) subdirs = [ op.join(ds.path, "{}-{}".format(d, label)) for d in ["foo", "bar"] ] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) # cfg_proc was applied generated subdatasets. ok_exists(op.join(subds, "code")) if save: assert_repo_status(path) else: # The datasets are create but not saved (since asked not to) assert_repo_status(path, untracked=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_( set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}", result_renderer='disabled') assert_in("Not creating subdataset at existing path", cml.out)
def test_addurls_repindex(self, path): ds = Dataset(path).create(force=True) with assert_raises(IncompleteResultsError) as raised: ds.addurls(self.json_file, "{url}", "{subdir}") assert_in("There are file name collisions", str(raised.exception)) ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}") for fname in ["foo-0", "bar-0", "foo-1"]: ok_exists(op.join(ds.path, fname))
def test_addurls_repindex(self, path): ds = Dataset(path).create(force=True) with chpwd(path): with assert_raises(IncompleteResultsError) as raised: ds.addurls(self.json_file, "{url}", "{subdir}") assert_in("There are file name collisions", str(raised.exception)) ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}") for fname in ["foo-0", "bar-0", "foo-1"]: ok_exists(fname)
def test_addurls_metafail(self, path): ds = Dataset(path).create(force=True) # Force failure by passing a non-existent file name to annex. fn = ds.repo.set_metadata_ def set_meta(_, **kwargs): for i in fn("wreaking-havoc-and-such", **kwargs): yield i with chpwd(path), patch.object(ds.repo, 'set_metadata_', set_meta): with assert_raises(IncompleteResultsError): ds.addurls(self.json_file, "{url}", "{name}")
def test_addurls_metafail(self, path): ds = Dataset(path).create(force=True) # Force failure by passing a non-existent file name to annex. fn = ds.repo.set_metadata_ def set_meta(_, **kwargs): for i in fn("wreaking-havoc-and-such", **kwargs): yield i with patch.object(ds.repo, 'set_metadata_', set_meta): with assert_raises(IncompleteResultsError): ds.addurls(self.json_file, "{url}", "{name}")
def test_addurls_from_key_invalid_format(self, path): ds = Dataset(path).create(force=True) for fmt in [ "{name}-which-has-no-double-dash", # Invalid hash length. "MD5-s{size}--{md5sum}a", # Invalid hash content. "MD5-s{size}--" + 32 * "q" ]: with assert_raises(IncompleteResultsError): ds.addurls(self.json_file, "{url}", "{name}", key=fmt, exclude_autometa="*")
def test_addurls_deeper(self, path): ds = Dataset(path).create(force=True) ds.addurls(self.json_file, "{url}", "{subdir}//adir/{subdir}-again//other-ds//bdir/{name}") eq_( set(ds.subdatasets(recursive=True, result_xfm="relpaths")), { "foo", "bar", op.join("foo", "adir", "foo-again"), op.join("bar", "adir", "bar-again"), op.join("foo", "adir", "foo-again", "other-ds"), op.join("bar", "adir", "bar-again", "other-ds") }) ok_exists( os.path.join(ds.path, "foo", "adir", "foo-again", "other-ds", "bdir", "a"))
def test_addurls_repindex(self=None, path=None): ds = Dataset(path).create(force=True) with assert_raises(IncompleteResultsError) as raised: ds.addurls(self.json_file, "{url}", "{subdir}", result_renderer='disabled') assert_in("collided", str(raised.value)) ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}", result_renderer='disabled') for fname in ["foo-0", "bar-0", "foo-1"]: ok_exists(op.join(ds.path, fname))
def test_addurls_no_rows(self, path): ds = Dataset(path).create(force=True) for fname in ["in.csv", "in.json"]: with swallow_logs(new_level=logging.WARNING) as cml: assert_in_results(ds.addurls(fname, "{url}", "{name}"), action="addurls", status="notneeded") cml.assert_logged("No rows", regex=False)
def test_addurls_url_special_key_fail(self, path): ds = Dataset(path).create(force=True) res1 = ds.addurls(self.json_file, "{url}", "{_url4}/{_url_filename}", on_failure="ignore") assert_in("Special key", res1[0]["message"]) data = self.data.copy()[:1] data[0]["url"] = urlparse(data[0]["url"]).netloc with patch("sys.stdin", new=StringIO(json.dumps(data))): res2 = ds.addurls("-", "{url}", "{_url_basename}", on_failure="ignore") assert_in("Special key", res2[0]["message"])
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) for save in True, False: label = "save" if save else "nosave" ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save, cfg_proc=["yoda"]) subdirs = [ op.join(ds.path, "{}-{}".format(d, label)) for d in ["foo", "bar"] ] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) # cfg_proc was applied generated subdatasets. ok_exists(op.join(subds, "code")) if save: assert_repo_status(path) else: # The datasets are create and saved ... assert_repo_status(path, modified=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_( set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_drop_after(self, path): ds = Dataset(path).create(force=True) ds.repo.set_gitattributes([('a*', {'annex.largefiles': 'nothing'})]) # make some files go to git, so we could test that we do not blow # while trying to drop what is in git not annex res = ds.addurls(self.json_file, '{url}', '{name}', drop_after=True) assert_result_count(res, 3, action='addurl', status='ok') # a, b, c even if a goes to git assert_result_count(res, 2, action='drop', status='ok') # b, c
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return int( ds.repo.repo.git.rev_list("--count", "git-annex").strip()) n_annex_commits = get_annex_commit_counts() with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. if not os.environ.get('DATALAD_FAKE__DATES'): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results(ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_addurls_url_on_collision_choose(self=None, path=None): ds = Dataset(path).create(force=True) data = deepcopy(self.data) for row in data: row["name"] = "a" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-first", on_collision="take-first") ok_file_has_content(op.join(ds.path, "a-first"), "a content", strip=True) with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-last", on_collision="take-last") ok_file_has_content(op.join(ds.path, "a-last"), "c content", strip=True)
def test_addurls_version(self, path): ds = Dataset(path).create(force=True) def version_fn(url): if url.endswith("b.dat"): raise ValueError("Scheme error") return url + ".v1" with patch("datalad.plugin.addurls.get_versioned_url", version_fn): with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "{url}", "{name}", version_urls=True) assert_in("b.dat", str(cml.out)) names = ["a", "c"] for fname in names: ok_exists(os.path.join(path, fname)) whereis = ds.repo.whereis(names, output="full") for fname, info in whereis.items(): eq_(info[ds.repo.WEB_UUID]['urls'], ["{}udir/{}.dat.v1".format(self.url, fname)])
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return int( ds.repo.repo.git.rev_list("--count", "git-annex").strip()) n_annex_commits = get_annex_commit_counts() with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. if not os.environ.get('DATALAD_FAKE__DATES'): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results( ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) if save: assert_repo_status(path) else: # The datasets are create and saved ... assert_repo_status(path, modified=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_(set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_addurls(self, path): ds = Dataset(path).create(force=True) with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results(ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.add("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_addurls_url_filename(self, path): ds = Dataset(path).create(force=True) with chpwd(path): ds.addurls(self.json_file, "{url}", "{_url0}/{_url_filename}") for fname in ["udir/a.dat", "udir/b.dat", "udir/c.dat"]: ok_exists(fname)
def test_addurls_url_filename(self, path): ds = Dataset(path).create(force=True) ds.addurls(self.json_file, "{url}", "{_url0}/{_url_filename}") for fname in ["a.dat", "b.dat", "c.dat"]: ok_exists(op.join(ds.path, "udir", fname))
def test_addurls_nonannex_repo(path): ds = Dataset(path).create(force=True, no_annex=True) with assert_raises(IncompleteResultsError) as raised: ds.addurls("dummy_arg0", "dummy_arg1", "dummy_arg2") assert_in("not an annex repo", str(raised.exception))
def test_addurls_nonannex_repo(path): ds = Dataset(path).create(force=True, annex=False) with assert_raises(IncompleteResultsError) as raised: ds.addurls("dummy_arg0", "dummy_arg1", "dummy_arg2") assert_in("not an annex repo", str(raised.exception))
def test_addurls_dropped_urls(self, path): ds = Dataset(path).create(force=True) with chpwd(path), swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "", "{subdir}//{name}") assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL", str(cml.out))