Beispiel #1
0
 def test_addurls_invalid_input(self, path):
     ds = Dataset(path).create(force=True)
     in_file = op.join(path, "in")
     for in_type in ["csv", "json"]:
         with assert_raises(IncompleteResultsError) as exc:
             ds.addurls(in_file, "{url}", "{name}", input_type=in_type)
         assert_in("Failed to read", str(exc.exception))
Beispiel #2
0
    def test_addurls_url_on_collision_error_if_different(self=None, path=None):
        ds = Dataset(path).create(force=True)

        data = [self.data[0].copy(), self.data[0].copy()]
        data[0]["some_metadata"] = "1"
        data[1]["some_metadata"] = "2"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-",
                       "{url}",
                       "{name}",
                       exclude_autometa="*",
                       on_collision="error-if-different")
        ok_exists(op.join(ds.path, "a"))
Beispiel #3
0
    def test_addurls_url_parts(self, path):
        ds = Dataset(path).create(force=True)
        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}")

            for fname in ["a.dat", "b.dat", "c.dat"]:
                ok_exists(op.join("udir", fname))
Beispiel #4
0
def test_addurls_dry_run(path):
    ds = Dataset(path).create(force=True)

    json_file = "links.json"
    with open(op.join(ds.path, json_file), "w") as jfh:
        json.dump([{
            "url": "URL/a.dat",
            "name": "a",
            "subdir": "foo"
        }, {
            "url": "URL/b.dat",
            "name": "b",
            "subdir": "bar"
        }, {
            "url": "URL/c.dat",
            "name": "c",
            "subdir": "foo"
        }], jfh)

    ds.save(message="setup")

    with swallow_logs(new_level=logging.INFO) as cml:
        ds.addurls(json_file,
                   "{url}",
                   "{subdir}//{_url_filename_root}",
                   dry_run=True)

        for dir_ in ["foo", "bar"]:
            assert_in("Would create a subdataset at {}".format(dir_), cml.out)
        assert_in(
            "Would download URL/a.dat to {}".format(
                os.path.join(path, "foo", "BASE")), cml.out)

        assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]), cml.out)
Beispiel #5
0
    def test_addurls_version(self=None, path=None):
        ds = Dataset(path).create(force=True)

        def version_fn(url):
            if url.endswith("b.dat"):
                raise ValueError("Scheme error")
            return url + ".v1"

        with patch("datalad.local.addurls.get_versioned_url", version_fn):
            with swallow_logs(new_level=logging.WARNING) as cml:
                ds.addurls(self.json_file,
                           "{url}",
                           "{name}",
                           version_urls=True,
                           result_renderer='disabled')
                assert_in("b.dat", str(cml.out))

        names = ["a", "c"]
        for fname in names:
            ok_exists(os.path.join(path, fname))

        whereis = ds.repo.whereis(names, output="full")
        for fname, info in whereis.items():
            eq_(info[WEB_SPECIAL_REMOTE_UUID]['urls'],
                ["{}udir/{}.dat.v1".format(self.url, fname)])
Beispiel #6
0
def test_addurls_unknown_placeholder(path):
    ds = Dataset(path).create(force=True)
    # Close but wrong URL placeholder
    with assert_raises(IncompleteResultsError) as exc:
        ds.addurls("in.csv", "{link}", "{abcd}", dry_run=True)
    assert_in("linky", str(exc.exception))
    # Close but wrong file name placeholder
    with assert_raises(IncompleteResultsError) as exc:
        ds.addurls("in.csv", "{linky}", "{abc}", dry_run=True)
    assert_in("abcd", str(exc.exception))
    # Out-of-bounds index.
    with assert_raises(IncompleteResultsError) as exc:
        ds.addurls("in.csv", "{linky}", "{3}", dry_run=True)
    assert_in("index", str(exc.exception))

    # Suggestions also work for automatic file name placeholders
    with assert_raises(IncompleteResultsError) as exc:
        ds.addurls("in.csv", "{linky}", "{_url_hostnam}", dry_run=True)
    assert_in("_url_hostname", str(exc.exception))
    # ... though if you whiff on the beginning prefix, we don't suggest
    # anything because we decide to generate those fields based on detecting
    # the prefix.
    with assert_raises(IncompleteResultsError) as exc:
        ds.addurls("in.csv", "{linky}", "{_uurl_hostnam}", dry_run=True)
    assert_not_in("_url_hostname", str(exc.exception))
Beispiel #7
0
    def test_addurls_url_parts(self, path):
        ds = Dataset(path).create(force=True)
        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}")

            for fname in ["udir/a.dat", "udir/b.dat", "udir/c.dat"]:
                ok_exists(fname)
Beispiel #8
0
    def check_addurls_from_key(self, key_arg, expected_backend, fake_dates,
                               path):
        ds = Dataset(path).create(force=True, fake_dates=fake_dates)
        if OLD_EXAMINEKEY and ds.repo.is_managed_branch():
            raise SkipTest("Adjusted branch functionality requires "
                           "more recent `git annex examinekey`")
        ds.addurls(self.json_file,
                   "{url}",
                   "{name}",
                   exclude_autometa="*",
                   key=key_arg,
                   result_renderer='disabled')
        repo = ds.repo
        repo_path = ds.repo.pathobj
        paths = [repo_path / x for x in "ac"]

        annexinfo = repo.get_content_annexinfo(eval_availability=True)
        for path in paths:
            pstat = annexinfo[path]
            eq_(pstat["backend"], expected_backend)
            assert_false(pstat["has_content"])

        get_res = ds.get(paths,
                         result_renderer='disabled',
                         on_failure="ignore")
        assert_result_count(get_res, 2, action="get", status="ok")
Beispiel #9
0
    def test_addurls_row_missing_key_fields(self=None, path=None):
        ds = Dataset(path).create(force=True)
        if OLD_EXAMINEKEY and ds.repo.is_managed_branch():
            raise SkipTest("Adjusted branch functionality requires "
                           "more recent `git annex examinekey`")
        data = deepcopy(self.data)
        for row in data:
            if row["name"] == "b":
                del row["md5sum"]
                break
        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-",
                       "{url}",
                       "{name}",
                       exclude_autometa="*",
                       key="MD5-s{size}--{md5sum}",
                       result_renderer='disabled')

        repo = ds.repo
        repo_path = ds.repo.pathobj
        paths = [repo_path / x for x in "ac"]

        annexinfo = repo.get_content_annexinfo(eval_availability=True)
        for path in paths:
            pstat = annexinfo[path]
            eq_(pstat["backend"], "MD5")
            assert_false(pstat["has_content"])
Beispiel #10
0
    def test_addurls_url_parts(self, path):
        ds = Dataset(path).rev_create(force=True)
        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}")

            for fname in ["a.dat", "b.dat", "c.dat"]:
                ok_exists(op.join("udir", fname))
Beispiel #11
0
def test_addurls_dry_run(path):
    ds = Dataset(path).create(force=True)

    with chpwd(path):
        json_file = "links.json"
        with open(json_file, "w") as jfh:
            json.dump([{"url": "URL/a.dat", "name": "a", "subdir": "foo"},
                       {"url": "URL/b.dat", "name": "b", "subdir": "bar"},
                       {"url": "URL/c.dat", "name": "c", "subdir": "foo"}],
                      jfh)

        ds.save(message="setup")

        with swallow_logs(new_level=logging.INFO) as cml:
            ds.addurls(json_file,
                       "{url}",
                       "{subdir}//{_url_filename_root}",
                       dry_run=True)

            for dir_ in ["foo", "bar"]:
                assert_in("Would create a subdataset at {}".format(dir_),
                          cml.out)
            assert_in(
                "Would download URL/a.dat to {}".format(
                    os.path.join(path, "foo", "BASE")),
                cml.out)

            assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]),
                      cml.out)
Beispiel #12
0
    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            for save in True, False:
                label = "save" if save else "nosave"
                hexsha_before = ds.repo.get_hexsha()
                ds.addurls(self.json_file, "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save)
                hexsha_after = ds.repo.get_hexsha()

                for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]:
                    ok_exists(fname.format(label))

                assert_true(save ^ (hexsha_before == hexsha_after))
                assert_true(save ^ ds.repo.dirty)

            # Now save the "--nosave" changes and check that we have
            # all the subdatasets.
            ds.add(".")
            eq_(set(subdatasets(ds, recursive=True,
                                result_xfm="relpaths")),
                {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

            # We don't try to recreate existing subdatasets.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
                assert_in("Not creating subdataset at existing path", cml.out)
Beispiel #13
0
    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            for save in True, False:
                label = "save" if save else "nosave"
                hexsha_before = ds.repo.get_hexsha()
                ds.addurls(self.json_file,
                           "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save)
                hexsha_after = ds.repo.get_hexsha()

                for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]:
                    ok_exists(fname.format(label))

                assert_true(save ^ (hexsha_before == hexsha_after))
                assert_true(save ^ ds.repo.dirty)

            # Now save the "--nosave" changes and check that we have
            # all the subdatasets.
            ds.add(".")
            eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")),
                {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

            # We don't try to recreate existing subdatasets.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
                assert_in("Not creating subdataset at existing path", cml.out)
Beispiel #14
0
 def test_addurls_url_filename(self=None, path=None):
     ds = Dataset(path).create(force=True)
     ds.addurls(self.json_file,
                "{url}",
                "{_url0}/{_url_filename}",
                result_renderer='disabled')
     for fname in ["a.dat", "b.dat", "c.dat"]:
         ok_exists(op.join(ds.path, "udir", fname))
Beispiel #15
0
def test_addurls_nonannex_repo(path=None):
    ds = Dataset(path).create(force=True, annex=False)
    with assert_raises(IncompleteResultsError) as raised:
        ds.addurls("dummy_arg0",
                   "dummy_arg1",
                   "dummy_arg2",
                   result_renderer='disabled')
    assert_in("not an annex repo", str(raised.value))
Beispiel #16
0
 def test_addurls_dropped_urls(self=None, path=None):
     ds = Dataset(path).create(force=True)
     with swallow_logs(new_level=logging.WARNING) as cml:
         ds.addurls(self.json_file,
                    "",
                    "{subdir}//{name}",
                    result_renderer='disabled')
         assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL",
                      str(cml.out))
Beispiel #17
0
 def check_addurls_stdin_input(self, input_text, input_type, path):
     ds = Dataset(path).create(force=True)
     with patch("sys.stdin", new=StringIO(input_text)):
         ds.addurls("-",
                    "{url}",
                    "{name}",
                    input_type=input_type,
                    result_renderer='disabled')
     for fname in ["a", "b", "c"]:
         ok_exists(op.join(ds.path, fname))
Beispiel #18
0
 def test_addurls_invalid_input(self=None, path=None):
     ds = Dataset(path).create(force=True)
     in_file = op.join(path, "in")
     for in_type in au.INPUT_TYPES:
         with assert_raises(IncompleteResultsError) as exc:
             ds.addurls(in_file,
                        "{url}",
                        "{name}",
                        input_type=in_type,
                        result_renderer='disabled')
         assert_in("Failed to read", str(exc.value))
Beispiel #19
0
    def test_addurls_subdataset(self=None, path=None):
        ds = Dataset(path).create(force=True)

        for save in True, False:
            label = "save" if save else "nosave"
            with swallow_outputs() as cmo:
                ds.addurls(self.json_file,
                           "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save,
                           cfg_proc=["yoda"])
                # The custom result renderer transforms the subdataset
                # action=create results into something more informative than
                # "create(ok): . (dataset)"...
                assert_in("create(ok): foo-{} (dataset)".format(label),
                          cmo.out)
                # ... and that doesn't lose the standard summary.
                assert_in("create (ok: 2)", cmo.out)

            subdirs = [
                op.join(ds.path, "{}-{}".format(d, label))
                for d in ["foo", "bar"]
            ]
            subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]]))

            for subds, fnames in subdir_files.items():
                for fname in fnames:
                    ok_exists(op.join(subds, fname))
                # cfg_proc was applied generated subdatasets.
                ok_exists(op.join(subds, "code"))
            if save:
                assert_repo_status(path)
            else:
                # The datasets are create but not saved (since asked not to)
                assert_repo_status(path, untracked=subdirs)
                # but the downloaded files aren't.
                for subds, fnames in subdir_files.items():
                    assert_repo_status(subds, added=fnames)

        # Now save the "--nosave" changes and check that we have
        # all the subdatasets.
        ds.save()
        eq_(
            set(subdatasets(dataset=ds, recursive=True,
                            result_xfm="relpaths")),
            {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

        # We don't try to recreate existing subdatasets.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file,
                       "{url}",
                       "{subdir}-nosave//{name}",
                       result_renderer='disabled')
            assert_in("Not creating subdataset at existing path", cml.out)
Beispiel #20
0
    def test_addurls_repindex(self, path):
        ds = Dataset(path).create(force=True)

        with assert_raises(IncompleteResultsError) as raised:
            ds.addurls(self.json_file, "{url}", "{subdir}")
        assert_in("There are file name collisions", str(raised.exception))

        ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}")

        for fname in ["foo-0", "bar-0", "foo-1"]:
            ok_exists(op.join(ds.path, fname))
Beispiel #21
0
    def test_addurls_repindex(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            with assert_raises(IncompleteResultsError) as raised:
                ds.addurls(self.json_file, "{url}", "{subdir}")
            assert_in("There are file name collisions", str(raised.exception))

            ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}")

            for fname in ["foo-0", "bar-0", "foo-1"]:
                ok_exists(fname)
Beispiel #22
0
    def test_addurls_metafail(self, path):
        ds = Dataset(path).create(force=True)

        # Force failure by passing a non-existent file name to annex.
        fn = ds.repo.set_metadata_

        def set_meta(_, **kwargs):
            for i in fn("wreaking-havoc-and-such", **kwargs):
                yield i

        with chpwd(path), patch.object(ds.repo, 'set_metadata_', set_meta):
            with assert_raises(IncompleteResultsError):
                ds.addurls(self.json_file, "{url}", "{name}")
Beispiel #23
0
    def test_addurls_metafail(self, path):
        ds = Dataset(path).create(force=True)

        # Force failure by passing a non-existent file name to annex.
        fn = ds.repo.set_metadata_

        def set_meta(_, **kwargs):
            for i in fn("wreaking-havoc-and-such", **kwargs):
                yield i

        with patch.object(ds.repo, 'set_metadata_', set_meta):
            with assert_raises(IncompleteResultsError):
                ds.addurls(self.json_file, "{url}", "{name}")
Beispiel #24
0
 def test_addurls_from_key_invalid_format(self, path):
     ds = Dataset(path).create(force=True)
     for fmt in [
             "{name}-which-has-no-double-dash",
             # Invalid hash length.
             "MD5-s{size}--{md5sum}a",
             # Invalid hash content.
             "MD5-s{size}--" + 32 * "q"
     ]:
         with assert_raises(IncompleteResultsError):
             ds.addurls(self.json_file,
                        "{url}",
                        "{name}",
                        key=fmt,
                        exclude_autometa="*")
Beispiel #25
0
 def test_addurls_deeper(self, path):
     ds = Dataset(path).create(force=True)
     ds.addurls(self.json_file, "{url}",
                "{subdir}//adir/{subdir}-again//other-ds//bdir/{name}")
     eq_(
         set(ds.subdatasets(recursive=True, result_xfm="relpaths")), {
             "foo", "bar",
             op.join("foo", "adir", "foo-again"),
             op.join("bar", "adir", "bar-again"),
             op.join("foo", "adir", "foo-again", "other-ds"),
             op.join("bar", "adir", "bar-again", "other-ds")
         })
     ok_exists(
         os.path.join(ds.path, "foo", "adir", "foo-again", "other-ds",
                      "bdir", "a"))
Beispiel #26
0
    def test_addurls_repindex(self=None, path=None):
        ds = Dataset(path).create(force=True)

        with assert_raises(IncompleteResultsError) as raised:
            ds.addurls(self.json_file,
                       "{url}",
                       "{subdir}",
                       result_renderer='disabled')
        assert_in("collided", str(raised.value))

        ds.addurls(self.json_file,
                   "{url}",
                   "{subdir}-{_repindex}",
                   result_renderer='disabled')

        for fname in ["foo-0", "bar-0", "foo-1"]:
            ok_exists(op.join(ds.path, fname))
Beispiel #27
0
 def test_addurls_no_rows(self, path):
     ds = Dataset(path).create(force=True)
     for fname in ["in.csv", "in.json"]:
         with swallow_logs(new_level=logging.WARNING) as cml:
             assert_in_results(ds.addurls(fname, "{url}", "{name}"),
                               action="addurls",
                               status="notneeded")
             cml.assert_logged("No rows", regex=False)
    def test_addurls_url_special_key_fail(self, path):
        ds = Dataset(path).create(force=True)

        res1 = ds.addurls(self.json_file,
                          "{url}",
                          "{_url4}/{_url_filename}",
                          on_failure="ignore")
        assert_in("Special key", res1[0]["message"])

        data = self.data.copy()[:1]
        data[0]["url"] = urlparse(data[0]["url"]).netloc
        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            res2 = ds.addurls("-",
                              "{url}",
                              "{_url_basename}",
                              on_failure="ignore")
        assert_in("Special key", res2[0]["message"])
Beispiel #29
0
    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        for save in True, False:
            label = "save" if save else "nosave"
            ds.addurls(self.json_file,
                       "{url}",
                       "{subdir}-" + label + "//{name}",
                       save=save,
                       cfg_proc=["yoda"])

            subdirs = [
                op.join(ds.path, "{}-{}".format(d, label))
                for d in ["foo", "bar"]
            ]
            subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]]))

            for subds, fnames in subdir_files.items():
                for fname in fnames:
                    ok_exists(op.join(subds, fname))
                # cfg_proc was applied generated subdatasets.
                ok_exists(op.join(subds, "code"))
            if save:
                assert_repo_status(path)
            else:
                # The datasets are create and saved ...
                assert_repo_status(path, modified=subdirs)
                # but the downloaded files aren't.
                for subds, fnames in subdir_files.items():
                    assert_repo_status(subds, added=fnames)

        # Now save the "--nosave" changes and check that we have
        # all the subdatasets.
        ds.save()
        eq_(
            set(subdatasets(dataset=ds, recursive=True,
                            result_xfm="relpaths")),
            {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

        # We don't try to recreate existing subdatasets.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
            assert_in("Not creating subdataset at existing path", cml.out)
Beispiel #30
0
    def test_drop_after(self, path):
        ds = Dataset(path).create(force=True)
        ds.repo.set_gitattributes([('a*', {'annex.largefiles': 'nothing'})])
        # make some files go to git, so we could test that we do not blow
        # while trying to drop what is in git not annex
        res = ds.addurls(self.json_file, '{url}', '{name}', drop_after=True)

        assert_result_count(res, 3, action='addurl',
                            status='ok')  # a, b, c  even if a goes to git
        assert_result_count(res, 2, action='drop', status='ok')  # b, c
Beispiel #31
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file,
                           "{url}",
                           "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(ds.addurls(self.json_file,
                                         "{url}",
                                         "{name}",
                                         ifexists="skip"),
                              action="addurls",
                              status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                          "{url}", "{name}")
Beispiel #32
0
    def test_addurls_url_on_collision_choose(self=None, path=None):
        ds = Dataset(path).create(force=True)
        data = deepcopy(self.data)
        for row in data:
            row["name"] = "a"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")
        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-first", on_collision="take-first")
        ok_file_has_content(op.join(ds.path, "a-first"),
                            "a content",
                            strip=True)

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-last", on_collision="take-last")
        ok_file_has_content(op.join(ds.path, "a-last"),
                            "c content",
                            strip=True)
Beispiel #33
0
    def test_addurls_version(self, path):
        ds = Dataset(path).create(force=True)

        def version_fn(url):
            if url.endswith("b.dat"):
                raise ValueError("Scheme error")
            return url + ".v1"

        with patch("datalad.plugin.addurls.get_versioned_url", version_fn):
            with swallow_logs(new_level=logging.WARNING) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           version_urls=True)
                assert_in("b.dat", str(cml.out))

        names = ["a", "c"]
        for fname in names:
            ok_exists(os.path.join(path, fname))

        whereis = ds.repo.whereis(names, output="full")
        for fname, info in whereis.items():
            eq_(info[ds.repo.WEB_UUID]['urls'],
                ["{}udir/{}.dat.v1".format(self.url, fname)])
Beispiel #34
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta,
                                  {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(
                ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"),
                action="addurls",
                status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError,
                          ds.addurls,
                          self.json_file, "{url}", "{name}")
Beispiel #35
0
    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            for save in True, False:
                label = "save" if save else "nosave"
                ds.addurls(self.json_file, "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save)

                subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]]
                subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]]))

                for subds, fnames in subdir_files.items():
                    for fname in fnames:
                        ok_exists(op.join(subds, fname))

                if save:
                    assert_repo_status(path)
                else:
                    # The datasets are create and saved ...
                    assert_repo_status(path, modified=subdirs)
                    # but the downloaded files aren't.
                    for subds, fnames in subdir_files.items():
                        assert_repo_status(subds, added=fnames)

            # Now save the "--nosave" changes and check that we have
            # all the subdatasets.
            ds.save()
            eq_(set(subdatasets(dataset=ds, recursive=True,
                                result_xfm="relpaths")),
                {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

            # We don't try to recreate existing subdatasets.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
                assert_in("Not creating subdataset at existing path", cml.out)
Beispiel #36
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file,
                           "{url}",
                           "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(ds.addurls(self.json_file,
                                         "{url}",
                                         "{name}",
                                         ifexists="skip"),
                              action="addurls",
                              status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.add("a")

            assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                          "{url}", "{name}")
Beispiel #37
0
 def test_addurls_url_filename(self, path):
     ds = Dataset(path).create(force=True)
     with chpwd(path):
         ds.addurls(self.json_file, "{url}", "{_url0}/{_url_filename}")
         for fname in ["udir/a.dat", "udir/b.dat", "udir/c.dat"]:
             ok_exists(fname)
Beispiel #38
0
 def test_addurls_url_filename(self, path):
     ds = Dataset(path).create(force=True)
     ds.addurls(self.json_file, "{url}", "{_url0}/{_url_filename}")
     for fname in ["a.dat", "b.dat", "c.dat"]:
         ok_exists(op.join(ds.path, "udir", fname))
Beispiel #39
0
def test_addurls_nonannex_repo(path):
    ds = Dataset(path).create(force=True, no_annex=True)
    with assert_raises(IncompleteResultsError) as raised:
        ds.addurls("dummy_arg0", "dummy_arg1", "dummy_arg2")
    assert_in("not an annex repo", str(raised.exception))
Beispiel #40
0
def test_addurls_nonannex_repo(path):
    ds = Dataset(path).create(force=True, annex=False)
    with assert_raises(IncompleteResultsError) as raised:
        ds.addurls("dummy_arg0", "dummy_arg1", "dummy_arg2")
    assert_in("not an annex repo", str(raised.exception))
Beispiel #41
0
 def test_addurls_dropped_urls(self, path):
     ds = Dataset(path).create(force=True)
     with chpwd(path), swallow_logs(new_level=logging.WARNING) as cml:
         ds.addurls(self.json_file, "", "{subdir}//{name}")
         assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL",
                      str(cml.out))