def check_dropall_get(repo):
    # drop all annex content for all revisions, clean the cache, get the content for all files in
    # master in all of its revisions
    t1w_fpath = opj(repo.path, 'sub-1', 'anat', 'sub-1_T1w.dat')
    ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
    # --force since it would fail to verify presence in case we remove archives keys... TODO
    repo._annex_custom_command([], ["git", "annex", "drop", "--all", "--force"])
    clean(dataset=repo.path)  # remove possible extracted archives
    with assert_raises(AssertionError):
        ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
    repo.get('.')
    ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
Beispiel #2
0
def check_dropall_get(repo):
    # drop all annex content for all revisions, clean the cache, get the content for all files in
    # master in all of its revisions
    t1w_fpath = opj(repo.path, 'sub-1', 'anat', 'sub-1_T1w.dat')
    ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
    # --force since it would fail to verify presence in case we remove archives keys... TODO
    repo.drop([], options=['--all', '--force'])

    clean(dataset=repo.path)  # remove possible extracted archives
    with assert_raises(AssertionError):
        ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
    try:
        repo.get('.')
    except CommandError as exc:
        # get() raises an exception starting with DataLad version 0.14 if
        # getting any of the items fail, and the `drop --force` call above
        # drops metadata files that aren't available elsewhere.
        pass
    ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
Beispiel #3
0
def test_add_archive_content(path_orig, url, repo_path):
    with chpwd(repo_path):
        # TODO we need to be able to pass path into add_archive_content
        # We could mock but I mean for the API
        assert_raises(RuntimeError, add_archive_content,
                      "nonexisting.tar.gz")  # no repo yet

        repo = AnnexRepo(repo_path, create=True)
        assert_raises(ValueError, add_archive_content, "nonexisting.tar.gz")
        # we can't add a file from outside the repo ATM
        assert_raises(FileNotInRepositoryError, add_archive_content,
                      opj(path_orig, '1.tar.gz'))

        # Let's add first archive to the repo so we could test
        with swallow_outputs():
            repo.add_urls([opj(url, '1.tar.gz')],
                          options=["--pathdepth", "-1"])
            for s in range(1, 5):
                repo.add_urls([opj(url, '%du/1.tar.gz' % s)],
                              options=["--pathdepth", "-2"])
        repo.commit("added 1.tar.gz")

        key_1tar = repo.get_file_key(
            '1.tar.gz')  # will be used in the test later

        def d1_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '1 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd', '1d'), annexed=True)
            ok_archives_caches(repo_path, 0)

        # and by default it just does it, everything goes to annex
        repo_ = add_archive_content('1.tar.gz')
        eq_(repo.path, repo_.path)
        d1_basic_checks()

        # If ran again, should proceed just fine since the content is the same so no changes would be made really
        add_archive_content('1.tar.gz')

        # But that other one carries updated file, so should fail due to overwrite
        with assert_raises(RuntimeError) as cme:
            add_archive_content(opj('1u', '1.tar.gz'), use_current_dir=True)

        # TODO: somewhat not precise since we have two possible "already exists"
        # -- in caching and overwrite check
        assert_in("already exists", str(cme.exception))
        # but should do fine if overrides are allowed
        add_archive_content(opj('1u', '1.tar.gz'),
                            existing='overwrite',
                            use_current_dir=True)
        add_archive_content(opj('2u', '1.tar.gz'),
                            existing='archive-suffix',
                            use_current_dir=True)
        add_archive_content(opj('3u', '1.tar.gz'),
                            existing='archive-suffix',
                            use_current_dir=True)
        add_archive_content(opj('4u', '1.tar.gz'),
                            existing='archive-suffix',
                            use_current_dir=True)

        # rudimentary test
        assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))),
                     ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt'])
        whereis = repo.whereis(glob(opj(repo_path, '1', '1*')))
        # they all must be the same
        assert (all([x == whereis[0] for x in whereis[1:]]))

    # and we should be able to reference it while under subdirectory
    subdir = opj(repo_path, 'subdir')
    with chpwd(subdir, mkdir=True):
        add_archive_content(opj(pardir, '1.tar.gz'), use_current_dir=True)
        d1_basic_checks()
        # or we could keep relative path and also demand to keep the archive prefix
        # while extracting under original (annex root) dir
        add_archive_content(opj(pardir, '1.tar.gz'),
                            add_archive_leading_dir=True)

    with chpwd(opj(repo_path, '1')):
        d1_basic_checks()

    with chpwd(repo_path):
        # test with excludes and renames and annex options
        add_archive_content('1.tar.gz',
                            exclude=['d'],
                            rename=['/ /_', '/^1/2'],
                            annex_options="-c annex.largefiles=exclude=*.txt",
                            delete=True)
        # no conflicts since new name
        ok_file_under_git('2', '1_f.txt', annexed=False)
        assert_false(exists(opj('2', 'd')))
        assert_false(exists('1.tar.gz'))  # delete was in effect

    # now test ability to extract within subdir
    with chpwd(opj(repo_path, 'd1'), mkdir=True):
        # Let's add first archive to the repo so we could test
        # named the same way but different content
        with swallow_outputs():
            repo.add_urls([opj(url, 'd1', '1.tar.gz')],
                          options=["--pathdepth", "-1"],
                          cwd=getpwd())  # invoke under current subdir
        repo.commit("added 1.tar.gz in d1")

        def d2_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '2 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd2', '2d'), annexed=True)
            ok_archives_caches(repo.path, 0)

        add_archive_content('1.tar.gz')
        d2_basic_checks()

    # in manual tests ran into the situation of inability to obtain on a single run
    # a file from an archive which was coming from a dropped key.  I thought it was
    # tested in custom remote tests, but I guess not sufficiently well enough
    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))
    ok_archives_caches(repo.path, 1, persistent=True)
    ok_archives_caches(repo.path, 0, persistent=False)

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher
    repo.get(opj('1', '1 f.txt'))  # that what managed to not work

    # TODO: check if persistent archive is there for the 1.tar.gz

    # We should be able to drop everything since available online
    with swallow_outputs():
        clean(dataset=repo.path)
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))  # and should be able to get it again

    # bug was that dropping didn't work since archive was dropped first
    repo.call_annex(["drop", "--all"])

    # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;)
    repo.get(key_1tar, key=True)
    unlink(opj(path_orig, '1.tar.gz'))
    with assert_raises(CommandError) as e:
        repo.drop(key_1tar, key=True)
        assert_equal(e.kwargs['stdout_json'][0]['success'], False)
        assert_result_values_cond(
            e.kwargs['stdout_json'], 'note', lambda x:
            '(Use --force to override this check, or adjust numcopies.)' in x)
    assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))
Beispiel #4
0
def test_clean(d=None):
    AnnexRepo(d, create=True)
    ds = Dataset(d)
    assert_status('notneeded', clean(dataset=ds))

    archives_path = ds.pathobj / Path(ARCHIVES_TEMP_DIR)
    annex_tmp_path = ds.pathobj / Path(ANNEX_TEMP_DIR)
    annex_trans_path = ds.pathobj / Path(ANNEX_TRANSFER_DIR)
    index_path = ds.repo.dot_git / Path(SEARCH_INDEX_DOTGITDIR)

    # if we create some temp archives directory
    (archives_path / 'somebogus').mkdir(parents=True)
    res = clean(dataset=ds,
                return_type='item-or-list',
                result_filter=lambda x: x['status'] == 'ok')
    assert_equal(res['path'], str(archives_path))
    assert_equal(res['message'][0] % tuple(res['message'][1:]),
                 "Removed 1 temporary archive directory: somebogus")
    assert_false(archives_path.exists())

    # relative path
    (archives_path / 'somebogus').mkdir(parents=True)
    (archives_path / 'somebogus2').mkdir(parents=True)
    with chpwd(d), swallow_outputs() as cmo:
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(
            res['message'][0] % tuple(res['message'][1:]),
            "Removed 2 temporary archive directories: somebogus, "
            "somebogus2")
        assert_false(archives_path.exists())

    # and what about git annex temporary files?
    annex_tmp_path.mkdir(parents=True)
    (annex_tmp_path / "somebogus").write_text("load")
    with chpwd(d):
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(res['path'], str(annex_tmp_path))
        assert_equal(res['message'][0] % tuple(res['message'][1:]),
                     "Removed 1 temporary annex file: somebogus")
        assert_false(annex_tmp_path.exists())

    (annex_trans_path / 'somebogus').mkdir(parents=True, exist_ok=True)
    with chpwd(d):
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(res['path'], str(annex_trans_path))
        assert_equal(
            res['message'][0] % tuple(res['message'][1:]),
            "Removed 1 annex temporary transfer directory: somebogus")
        assert_false(annex_trans_path.exists())

    # search index
    index_path.mkdir(parents=True)
    (index_path / "MAIN_r55n3hiyvxkdf1fi.seg, _MAIN_1.toc").write_text("noop")
    with chpwd(d):
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(res['path'], str(index_path))
        assert_equal(
            res['message'][0] % tuple(res['message'][1:]),
            "Removed 1 metadata search index file: "
            "MAIN_r55n3hiyvxkdf1fi.seg, _MAIN_1.toc")
        assert_false(index_path.exists())

    # remove empty directories, too
    archives_path.mkdir(parents=True)
    with chpwd(d):
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(res['path'], str(archives_path))
        assert_equal(res['message'][0] % tuple(res['message'][1:]),
                     "Removed empty temporary archive directory")
        assert_false(archives_path.exists())

    annex_tmp_path.mkdir(parents=True)
    with chpwd(d):
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(res['path'], str(annex_tmp_path))
        assert_equal(res['message'][0] % tuple(res['message'][1:]),
                     "Removed empty temporary annex directory")
        assert_false(annex_tmp_path.exists())

    annex_trans_path.mkdir(parents=True)
    with chpwd(d):
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(res['path'], str(annex_trans_path))
        assert_equal(res['message'][0] % tuple(res['message'][1:]),
                     "Removed empty annex temporary transfer directory")
        assert_false(annex_trans_path.exists())

    index_path.mkdir(parents=True)
    with chpwd(d):
        res = clean(return_type='item-or-list',
                    result_filter=lambda x: x['status'] == 'ok')
        assert_equal(res['path'], str(index_path))
        assert_equal(res['message'][0] % tuple(res['message'][1:]),
                     "Removed empty metadata search index directory")
        assert_false(index_path.exists())
def test_add_archive_content(path_orig=None, url=None, repo_path=None):
    with chpwd(repo_path):
        # TODO we need to be able to pass path into add_archive_content
        # We could mock but I mean for the API

        # no repo yet
        assert_raises(NoDatasetFound, add_archive_content,
                      "nonexisting.tar.gz")
        ds = Dataset(repo_path).create()
        res = ds.add_archive_content("nonexisting.tar.gz", on_failure='ignore')
        assert_in_results(res,
                          action='add-archive-content',
                          status='impossible')
        repo = ds.repo

        # we can't add a file from outside the repo ATM
        res = ds.add_archive_content(Path(path_orig) / '1.tar.gz',
                                     on_failure='ignore')
        assert_in_results(res,
                          action='add-archive-content',
                          status='impossible',
                          type="dataset",
                          message="Can not add archive outside of the dataset")

        # Let's add first archive to the repo so we could test
        with swallow_outputs():
            repo.add_url_to_file('1.tar.gz', opj(url, '1.tar.gz'))
            for s in range(1, 5):
                repo.add_url_to_file('%du/1.tar.gz' % s,
                                     opj(url, '%du/1.tar.gz' % s))
            repo.commit("added 1.tar.gz")

        key_1tar = repo.get_file_annexinfo('1.tar.gz')[
            'key']  # will be used in the test later

        def d1_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '1 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd', '1d'), annexed=True)
            ok_archives_caches(repo_path, 0)

        # and by default it just does it, everything goes to annex
        res = add_archive_content('1.tar.gz')
        assert_in_results(res, action='add-archive-content', status='ok')
        d1_basic_checks()

        # If ran again, should proceed just fine since the content is the same
        # so no changes would be made really
        res = add_archive_content('1.tar.gz')
        assert_in_results(res, action='add-archive-content', status='ok')

        # But that other one carries updated file, so should fail due to
        # overwrite
        res = add_archive_content(Path('1u') / '1.tar.gz',
                                  use_current_dir=True,
                                  on_failure='ignore')
        assert_in_results(
            res,
            action='add-archive-content',
            status='error',
        )
        assert_in('exists, but would be overwritten by new file',
                  res[0]['message'])
        # but should do fine if overrides are allowed
        add_archive_content(Path('1u') / '1.tar.gz',
                            existing='overwrite',
                            use_current_dir=True)
        add_archive_content(Path('2u') / '1.tar.gz',
                            existing='archive-suffix',
                            use_current_dir=True)
        add_archive_content(Path('3u') / '1.tar.gz',
                            existing='archive-suffix',
                            use_current_dir=True)
        add_archive_content(Path('4u') / '1.tar.gz',
                            existing='archive-suffix',
                            use_current_dir=True)

        # rudimentary test
        assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))),
                     ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt'])
        whereis = repo.whereis(glob(opj(repo_path, '1', '1*')))
        # they all must be the same
        assert (all([x == whereis[0] for x in whereis[1:]]))

    # and we should be able to reference it while under subdirectory
    subdir = opj(repo_path, 'subdir')
    with chpwd(subdir, mkdir=True):
        add_archive_content(opj(pardir, '1.tar.gz'),
                            dataset=ds.path,
                            use_current_dir=True)
        d1_basic_checks()
        # or we could keep relative path and also demand to keep the archive prefix
        # while extracting under original (annex root) dir
        add_archive_content(opj(pardir, '1.tar.gz'),
                            dataset=ds.path,
                            add_archive_leading_dir=True)

    with chpwd(opj(repo_path, '1')):
        d1_basic_checks()

    with chpwd(repo_path):
        # test with excludes and renames and annex options
        ds.add_archive_content(
            '1.tar.gz',
            exclude=['d'],
            rename=['/ /_', '/^1/2'],
            annex_options="-c annex.largefiles=exclude=*.txt",
            delete=True)
        # no conflicts since new name
        ok_file_under_git('2', '1_f.txt', annexed=False)
        assert_false(exists(opj('2', 'd')))
        assert_false(exists('1.tar.gz'))  # delete was in effect

    # now test ability to extract within subdir
    with chpwd(opj(repo_path, 'd1'), mkdir=True):
        # Let's add first archive to the repo so we could test
        # named the same way but different content
        with swallow_outputs():
            repo.add_url_to_file('d1/1.tar.gz', opj(url, 'd1', '1.tar.gz'))
        repo.commit("added 1.tar.gz in d1")

        def d2_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '2 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd2', '2d'), annexed=True)
            ok_archives_caches(repo.path, 0)

        add_archive_content('1.tar.gz', dataset=ds.path)
        d2_basic_checks()

    # in manual tests ran into the situation of inability to obtain on a single run
    # a file from an archive which was coming from a dropped key.  I thought it was
    # tested in custom remote tests, but I guess not sufficiently well enough
    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))
    ok_archives_caches(repo.path, 1, persistent=True)
    ok_archives_caches(repo.path, 0, persistent=False)

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher
    repo.get(opj('1', '1 f.txt'))  # that what managed to not work

    # TODO: check if persistent archive is there for the 1.tar.gz

    # We should be able to drop everything since available online
    with swallow_outputs():
        clean(dataset=ds)
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher

    ds.drop(opj('1', '1 f.txt'))  # should be all kosher
    ds.get(opj('1', '1 f.txt'))  # and should be able to get it again

    # bug was that dropping didn't work since archive was dropped first
    repo.call_annex(["drop", "--all"])

    # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;)
    repo.get(key_1tar, key=True)
    unlink(opj(path_orig, '1.tar.gz'))
    with assert_raises(CommandError) as e:
        repo.drop(key_1tar, key=True)
        assert_equal(e.kwargs['stdout_json'][0]['success'], False)
        assert_result_values_cond(
            e.kwargs['stdout_json'], 'note', lambda x:
            '(Use --force to override this check, or adjust numcopies.)' in x)
    assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))