Example #1
0
def test_kill(path):
    # nested datasets with load
    ds = Dataset(path).create()
    testfile = opj(ds.path, "file.dat")
    with open(testfile, 'w') as f:
        f.write("load")
    ds.save("file.dat")
    subds = ds.create('deep1')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['deep1'])
    ok_clean_git(ds.path)

    # and we fail to remove since content can't be dropped
    res = ds.remove(on_failure='ignore')
    assert_result_count(
        res, 1,
        status='error', path=testfile)
    # Following two assertions on message are relying on the actual error.
    # We have a second result with status 'impossible' for the ds, that we need
    # to filter out for those assertions:
    err_result = [r for r in res if r['status'] == 'error'][0]
    assert_result_values_cond(
        [err_result], 'message',
        lambda x: "configured minimum number of copies not found" in x or
        "Could only verify the existence of 0 out of 1 necessary copies" in x
    )
    eq_(ds.remove(recursive=True, check=False, result_xfm='datasets'),
        [subds, ds])
    ok_(not exists(path))
Example #2
0
def test_uninstall_recursive(path):
    ds = Dataset(path).create(force=True)
    subds = ds.create('deep', force=True)
    # we add one file, but we get a response for the requested
    # directory too
    res = subds.save()
    assert_result_count(res, 1, action='add', status='ok', type='file')
    assert_result_count(res, 1, action='save', status='ok', type='dataset')
    # save all -> all clean
    ds.save(recursive=True)
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
    # now uninstall in subdataset through superdataset
    target_fname = opj('deep', 'dir', 'test')
    # sane starting point
    ok_(exists(opj(ds.path, target_fname)))
    # doesn't have the minimum number of copies for a safe drop
    res = ds.drop(target_fname, recursive=True, on_failure='ignore')
    assert_status('error', res)
    assert_result_values_cond(
        res, 'message',
        lambda x: "configured minimum number of copies not found" in x or
        "Could only verify the existence of 0 out of 1 necessary copies" in x
    )

    # this should do it
    ds.drop(target_fname, check=False, recursive=True)
    # link is dead
    lname = opj(ds.path, target_fname)
    ok_(not exists(lname))
    # entire hierarchy saved
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
    # now same with actual handle removal
    # content is dropped already, so no checks in place anyway
    ds.remove(target_fname, check=True, recursive=True)
    ok_(not exists(lname) and not lexists(lname))
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
Example #3
0
def test_add_archive_content(path_orig, url, repo_path):
    direct = False  # TODO: test on undirect, but too long ATM
    orig_pwd = getpwd()
    chpwd(repo_path)
    # TODO we need to be able to pass path into add_archive_content
    # We could mock but I mean for the API
    assert_raises(RuntimeError, add_archive_content,
                  "nonexisting.tar.gz")  # no repo yet

    repo = AnnexRepo(repo_path, create=True, direct=direct)
    assert_raises(ValueError, add_archive_content, "nonexisting.tar.gz")
    # we can't add a file from outside the repo ATM
    assert_raises(FileNotInRepositoryError, add_archive_content,
                  opj(path_orig, '1.tar.gz'))

    # Let's add first archive to the repo so we could test
    with swallow_outputs():
        repo.add_urls([opj(url, '1.tar.gz')], options=["--pathdepth", "-1"])
        for s in range(1, 5):
            repo.add_urls([opj(url, '%du/1.tar.gz' % s)],
                          options=["--pathdepth", "-2"])
    repo.commit("added 1.tar.gz")

    key_1tar = repo.get_file_key('1.tar.gz')  # will be used in the test later

    def d1_basic_checks():
        ok_(exists('1'))
        ok_file_under_git('1', '1 f.txt', annexed=True)
        ok_file_under_git(opj('1', 'd', '1d'), annexed=True)
        ok_archives_caches(repo_path, 0)

    # and by default it just does it, everything goes to annex
    repo_ = add_archive_content('1.tar.gz')
    eq_(repo.path, repo_.path)
    d1_basic_checks()

    # If ran again, should proceed just fine since the content is the same so no changes would be made really
    add_archive_content('1.tar.gz')

    # But that other one carries updated file, so should fail due to overwrite
    with assert_raises(RuntimeError) as cme:
        add_archive_content(opj('1u', '1.tar.gz'), use_current_dir=True)

    # TODO: somewhat not precise since we have two possible "already exists"
    # -- in caching and overwrite check
    assert_in("already exists", str(cme.exception))
    # but should do fine if overrides are allowed
    add_archive_content(opj('1u', '1.tar.gz'),
                        existing='overwrite',
                        use_current_dir=True)
    add_archive_content(opj('2u', '1.tar.gz'),
                        existing='archive-suffix',
                        use_current_dir=True)
    add_archive_content(opj('3u', '1.tar.gz'),
                        existing='archive-suffix',
                        use_current_dir=True)
    add_archive_content(opj('4u', '1.tar.gz'),
                        existing='archive-suffix',
                        use_current_dir=True)

    # rudimentary test
    assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))),
                 ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt'])
    whereis = repo.whereis(glob(opj(repo_path, '1', '1*')))
    # they all must be the same
    assert (all([x == whereis[0] for x in whereis[1:]]))

    # and we should be able to reference it while under subdirectory
    subdir = opj(repo_path, 'subdir')
    with chpwd(subdir, mkdir=True):
        add_archive_content(opj(pardir, '1.tar.gz'), use_current_dir=True)
        d1_basic_checks()
        # or we could keep relative path and also demand to keep the archive prefix
        # while extracting under original (annex root) dir
        add_archive_content(opj(pardir, '1.tar.gz'),
                            add_archive_leading_dir=True)
    with chpwd('1'):
        d1_basic_checks()

    # test with excludes and renames and annex options
    add_archive_content('1.tar.gz',
                        exclude=['d'],
                        rename=['/ /_', '/^1/2'],
                        annex_options="-c annex.largefiles=exclude=*.txt",
                        delete=True)
    # no conflicts since new name
    ok_file_under_git('2', '1_f.txt', annexed=False)
    assert_false(exists(opj('2', 'd')))
    assert_false(exists('1.tar.gz'))  # delete was in effect

    # now test ability to extract within subdir
    with chpwd('d1', mkdir=True):
        # Let's add first archive to the repo so we could test
        # named the same way but different content
        with swallow_outputs():
            repo.add_urls([opj(url, 'd1', '1.tar.gz')],
                          options=["--pathdepth", "-1"],
                          cwd=getpwd())  # invoke under current subdir
        repo.commit("added 1.tar.gz in d1")

        def d2_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '2 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd2', '2d'), annexed=True)
            ok_archives_caches(repo.path, 0)

        add_archive_content('1.tar.gz')
        d2_basic_checks()

    # in manual tests ran into the situation of inability to obtain on a single run
    # a file from an archive which was coming from a dropped key.  I thought it was
    # tested in custom remote tests, but I guess not sufficiently well enough
    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))
    ok_archives_caches(repo.path, 1, persistent=True)
    ok_archives_caches(repo.path, 0, persistent=False)

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher
    repo.get(opj('1', '1 f.txt'))  # that what managed to not work

    # TODO: check if persistent archive is there for the 1.tar.gz

    # We should be able to drop everything since available online
    with swallow_outputs():
        clean(dataset=repo.path)
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher
    chpwd(orig_pwd
          )  # just to avoid warnings ;)  move below whenever SkipTest removed

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))  # and should be able to get it again

    # bug was that dropping didn't work since archive was dropped first
    repo._annex_custom_command([], ["git", "annex", "drop", "--all"])

    # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;)
    repo.get(key_1tar, key=True)
    unlink(opj(path_orig, '1.tar.gz'))
    res = repo.drop(key_1tar, key=True)
    assert_equal(res['success'], False)

    assert_result_values_cond(
        [res], 'note', lambda x:
        '(Use --force to override this check, or adjust numcopies.)' in x)
    assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))
def test_add_archive_content(path_orig, url, repo_path):
    with chpwd(repo_path):
        # TODO we need to be able to pass path into add_archive_content
        # We could mock but I mean for the API
        assert_raises(RuntimeError, add_archive_content, "nonexisting.tar.gz") # no repo yet

        repo = AnnexRepo(repo_path, create=True)
        assert_raises(ValueError, add_archive_content, "nonexisting.tar.gz")
        # we can't add a file from outside the repo ATM
        assert_raises(FileNotInRepositoryError, add_archive_content, opj(path_orig, '1.tar.gz'))

        # Let's add first archive to the repo so we could test
        with swallow_outputs():
            repo.add_urls([opj(url, '1.tar.gz')], options=["--pathdepth", "-1"])
            for s in range(1, 5):
                repo.add_urls([opj(url, '%du/1.tar.gz' % s)], options=["--pathdepth", "-2"])
        repo.commit("added 1.tar.gz")

        key_1tar = repo.get_file_key('1.tar.gz')  # will be used in the test later

        def d1_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '1 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd', '1d'), annexed=True)
            ok_archives_caches(repo_path, 0)

        # and by default it just does it, everything goes to annex
        repo_ = add_archive_content('1.tar.gz')
        eq_(repo.path, repo_.path)
        d1_basic_checks()

        # If ran again, should proceed just fine since the content is the same so no changes would be made really
        add_archive_content('1.tar.gz')

        # But that other one carries updated file, so should fail due to overwrite
        with assert_raises(RuntimeError) as cme:
            add_archive_content(opj('1u', '1.tar.gz'), use_current_dir=True)

        # TODO: somewhat not precise since we have two possible "already exists"
        # -- in caching and overwrite check
        assert_in("already exists", str(cme.exception))
        # but should do fine if overrides are allowed
        add_archive_content(opj('1u', '1.tar.gz'), existing='overwrite', use_current_dir=True)
        add_archive_content(opj('2u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True)
        add_archive_content(opj('3u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True)
        add_archive_content(opj('4u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True)

        # rudimentary test
        assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))),
                     ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt'])
        whereis = repo.whereis(glob(opj(repo_path, '1', '1*')))
        # they all must be the same
        assert(all([x == whereis[0] for x in whereis[1:]]))

    # and we should be able to reference it while under subdirectory
    subdir = opj(repo_path, 'subdir')
    with chpwd(subdir, mkdir=True):
        add_archive_content(opj(pardir, '1.tar.gz'), use_current_dir=True)
        d1_basic_checks()
        # or we could keep relative path and also demand to keep the archive prefix
        # while extracting under original (annex root) dir
        add_archive_content(opj(pardir, '1.tar.gz'), add_archive_leading_dir=True)

    with chpwd(opj(repo_path, '1')):
        d1_basic_checks()

    with chpwd(repo_path):
        # test with excludes and renames and annex options
        add_archive_content(
            '1.tar.gz', exclude=['d'], rename=['/ /_', '/^1/2'],
            annex_options="-c annex.largefiles=exclude=*.txt",
            delete=True)
        # no conflicts since new name
        ok_file_under_git('2', '1_f.txt', annexed=False)
        assert_false(exists(opj('2', 'd')))
        assert_false(exists('1.tar.gz'))  # delete was in effect

    # now test ability to extract within subdir
    with chpwd(opj(repo_path, 'd1'), mkdir=True):
        # Let's add first archive to the repo so we could test
        # named the same way but different content
        with swallow_outputs():
            repo.add_urls([opj(url, 'd1', '1.tar.gz')], options=["--pathdepth", "-1"],
                          cwd=getpwd())  # invoke under current subdir
        repo.commit("added 1.tar.gz in d1")

        def d2_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '2 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd2', '2d'), annexed=True)
            ok_archives_caches(repo.path, 0)

        add_archive_content('1.tar.gz')
        d2_basic_checks()

    # in manual tests ran into the situation of inability to obtain on a single run
    # a file from an archive which was coming from a dropped key.  I thought it was
    # tested in custom remote tests, but I guess not sufficiently well enough
    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))
    ok_archives_caches(repo.path, 1, persistent=True)
    ok_archives_caches(repo.path, 0, persistent=False)

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.drop(key_1tar, key=True)  # is available from the URL -- should be kosher
    repo.get(opj('1', '1 f.txt'))  # that what managed to not work

    # TODO: check if persistent archive is there for the 1.tar.gz

    # We should be able to drop everything since available online
    with swallow_outputs():
        clean(dataset=repo.path)
    repo.drop(key_1tar, key=True)  # is available from the URL -- should be kosher

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))  # and should be able to get it again

    # bug was that dropping didn't work since archive was dropped first
    repo._annex_custom_command([], ["git", "annex", "drop", "--all"])

    # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;)
    repo.get(key_1tar, key=True)
    unlink(opj(path_orig, '1.tar.gz'))
    res = repo.drop(key_1tar, key=True)
    assert_equal(res['success'], False)

    assert_result_values_cond(
        [res], 'note',
        lambda x: '(Use --force to override this check, or adjust numcopies.)' in x
    )
    assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))