Python Dataset.create Examples, datalad.distribution.dataset.Dataset.create Python Examples

Example #1

0

Show file

def test_copy_file_errors(dspath1, dspath2, nondspath):
    ds1 = Dataset(dspath1)
    # nothing given
    assert_raises(ValueError, copy_file)
    # no target directory given
    assert_raises(ValueError, ds1.copy_file, 'somefile')
    # using multiple sources and --specs-from
    assert_raises(ValueError, ds1.copy_file, ['1', '2', '3'], specs_from='-')
    # trying to copy to a dir that is not in a dataset
    ds1.create()
    assert_status(
        'error',
        ds1.copy_file('somepath', target_dir=nondspath, on_failure='ignore'))
    # copy into a dataset that is not in the reference dataset
    ds2 = Dataset(dspath2).create()
    assert_status(
        'error',
        ds1.copy_file('somepath', target_dir=dspath2, on_failure='ignore'))

    # attempt to copy from a directory, but no recursion is enabled.
    # use no reference ds to exercise a different code path
    assert_status('impossible',
                  copy_file([nondspath, dspath1], on_failure='ignore'))

    # attempt to copy a file that doesn't exist
    assert_status('impossible',
                  copy_file(['funky', dspath1], on_failure='ignore'))

    # attempt to copy a file without a destination given
    assert_raises(ValueError, copy_file, 'somepath')
    assert_status('impossible',
                  copy_file(specs_from=['somepath'], on_failure='ignore'))

Example #2

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_aggregate_with_unavailable_objects_from_subds(path, target):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # now make that a subdataset of a new one, so aggregation needs to get the
    # metadata objects first:
    super = Dataset(target).create()
    super.install("base", source=base.path)
    ok_clean_git(super.path)
    clone = Dataset(opj(super.path, "base"))
    ok_clean_git(clone.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = [o for o in sorted(clone.repo.get_annexed_files(with_content_only=False)) if o.startswith(objpath)]
    eq_(len(objs), 6)
    eq_(all(clone.repo.file_has_content(objs)), False)

    # now aggregate should get those metadata objects
    super.aggregate_metadata(recursive=True, update_mode='all',
                             force_extraction=False)
    eq_(all(clone.repo.file_has_content(objs)), True)

Example #3

0

Show file

File: test_run.py Project: nicholsn/datalad

def test_run_from_subds_gh3551(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.create("output")
    with chpwd(op.join(ds.path, "sub")):
        assert_in_results(
            run("echo",
                inputs=[op.join(op.pardir, "sub", "input")],
                outputs=[op.join(op.pardir, "output")],
                return_type="list", result_filter=None, result_xfm=None),
            action="get",
            status="notneeded")
    assert_repo_status(ds.path)

    subds_path = op.join("output", "subds")
    ds.create(subds_path)
    with chpwd(op.join(ds.path, "sub")):
        output_dir = op.join(op.pardir, "output", "subds")
        # The below command is trying to be compatible. It could be made better
        # (e.g., actually using the input file) by someone that knows something
        # about Windows.
        assert_in_results(
            run("cd .> {}".format(op.join(output_dir, "f")),
                inputs=[op.join(op.pardir, "sub", "input")],
                outputs=[output_dir],
                return_type="list", result_filter=None, result_xfm=None),
            action="save",
            status="ok")
    assert_repo_status(ds.path)
    subds = Dataset(op.join(ds.path, subds_path))
    ok_exists(op.join(subds.path, "f"))
    if not ds.repo.is_managed_branch():  # FIXME
        # This check fails on Windows:
        # https://github.com/datalad/datalad/pull/3747/checks?check_run_id=248506560#step:8:254
        ok_(subds.repo.file_has_content("f"))

Example #4

0

Show file

def test_subsuperdataset_save(path):
    # Verify that when invoked without recursion save does not
    # cause querying of subdatasets of the subdataset
    # see https://github.com/datalad/datalad/issues/4523
    parent = Dataset(path).create()
    # Create 3 levels of subdatasets so later to check operation
    # with or without --dataset being specified
    sub1 = parent.create('sub1')
    sub2 = parent.create(sub1.pathobj / 'sub2')
    sub3 = parent.create(sub2.pathobj / 'sub3')
    assert_repo_status(path)
    # now we will lobotomize that sub3 so git would fail if any query is performed.
    (sub3.pathobj / '.git' / 'config').chmod(0o000)
    try:
        sub3.repo.call_git(['ls-files'], read_only=True)
        raise SkipTest
    except CommandError:
        # desired outcome
        pass
    # the call should proceed fine since neither should care about sub3
    # default is no recursion
    parent.save('sub1')
    sub1.save('sub2')
    assert_raises(CommandError, parent.save, 'sub1', recursive=True)
    # and should not fail in the top level superdataset
    with chpwd(parent.path):
        save('sub1')
    # or in a subdataset above the problematic one
    with chpwd(sub1.path):
        save('sub2')

Example #5

0

Show file

def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    assert_repo_status(parent.path)
    sub1 = parent.create(op.join('down', 'sub1'))
    assert_repo_status(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    assert_repo_status(parent.path, modified=['sub2'])
    res = parent.save()
    assert_repo_status(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    assert_repo_status(parent.path, modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.save(recursive=True, jobs=5)
    # the key action is done
    assert_result_count(res,
                        1,
                        path=op.join(subsub.path, 'new'),
                        action='add',
                        status='ok')
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    assert_repo_status(parent.path)

Example #6

0

Show file

File: test_save.py Project: datalad/datalad

def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    assert_repo_status(parent.path)
    sub1 = parent.create(op.join('down', 'sub1'))
    assert_repo_status(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    assert_repo_status(parent.path, modified=['sub2'])
    res = parent.save()
    assert_repo_status(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    assert_repo_status(parent.path, modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.save(recursive=True)
    # the key action is done
    assert_result_count(
        res, 1, path=op.join(subsub.path, 'new'), action='add', status='ok')
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    assert_repo_status(parent.path)

Example #7

0

Show file

def test_subsuperdataset_save(path):
    # Verify that when invoked without recursion save does not
    # cause querying of subdatasets of the subdataset
    # see https://github.com/datalad/datalad/issues/4523
    parent = Dataset(path).create()
    # Create 3 levels of subdatasets so later to check operation
    # with or without --dataset being specified
    sub1 = parent.create('sub1')
    sub2 = parent.create(sub1.pathobj / 'sub2')
    sub3 = parent.create(sub2.pathobj / 'sub3')
    assert_repo_status(path)
    # now we will lobotomize that sub2 so git would fail if any query is performed.
    rmtree(str(sub3.pathobj / '.git' / 'objects'))
    # the call should proceed fine since neither should care about sub3
    # default is no recursion
    parent.save('sub1')
    sub1.save('sub2')
    assert_raises(CommandError, parent.save, 'sub1', recursive=True)
    # and should fail if we request saving while in the parent directory
    # but while not providing a dataset, since operation would run within
    # pointed subdataset
    with chpwd(sub1.path):
        assert_raises(CommandError, save, 'sub2')
    # but should not fail in the top level superdataset
    with chpwd(parent.path):
        save('sub1')

Example #8

0

Show file

File: test_config.py Project: ypid/datalad

def test_no_leaks(path1, path2):
    ds1 = Dataset(path1).create()
    ds1.config.set('i.was.here', 'today', where='local')
    assert_in('i.was.here', ds1.config.keys())
    ds1.config.reload()
    assert_in('i.was.here', ds1.config.keys())
    # now we move into this one repo, and create another
    # make sure that no config from ds1 leaks into ds2
    with chpwd(path1):
        ds2 = Dataset(path2)
        assert_not_in('i.was.here', ds2.config.keys())
        ds2.config.reload()
        assert_not_in('i.was.here', ds2.config.keys())

        ds2.create()
        assert_not_in('i.was.here', ds2.config.keys())

        # and that we do not track the wrong files
        assert_not_in(ds1.pathobj / '.git' / 'config',
                      ds2.config._stores['git']['files'])
        assert_not_in(ds1.pathobj / '.datalad' / 'config',
                      ds2.config._stores['dataset']['files'])
        # these are the right ones
        assert_in(ds2.pathobj / '.git' / 'config',
                  ds2.config._stores['git']['files'])
        assert_in(ds2.pathobj / '.datalad' / 'config',
                  ds2.config._stores['dataset']['files'])

Example #9

0

Show file

File: test_utils.py Project: kyleam/datalad

def test_dirty(path):
    for mode in _dirty_modes:
        # does nothing without a dataset
        handle_dirty_dataset(None, mode)
    # placeholder, but not yet created
    ds = Dataset(path)
    # unknown mode
    assert_raises(ValueError, handle_dirty_dataset, ds, 'MADEUP')
    # not yet created is very dirty
    assert_raises(RuntimeError, handle_dirty_dataset, ds, 'fail')
    handle_dirty_dataset(ds, 'ignore')
    assert_raises(RuntimeError, handle_dirty_dataset, ds, 'save-before')
    # should yield a clean repo
    ds.create()
    orig_state = ds.repo.get_hexsha()
    _check_all_clean(ds, orig_state)
    # tainted: untracked
    with open(opj(ds.path, 'something'), 'w') as f:
        f.write('some')
    # we don't want to auto-add untracked files by saving (anymore)
    assert_raises(AssertionError, _check_auto_save, ds, orig_state)
    # tainted: staged
    ds.repo.add('something', git=True)
    orig_state = _check_auto_save(ds, orig_state)
    # tainted: submodule
    # not added to super on purpose!
    subds = ds.create('subds')
    _check_all_clean(subds, subds.repo.get_hexsha())
    assert_repo_status(ds.path)
    # subdataset must be added as a submodule!
    assert_equal(ds.subdatasets(result_xfm='relpaths'), ['subds'])

Example #10

0

Show file

File: test_aggregation.py Project: psychoinformatics-de/datalad

def test_basic_aggregate(path):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    #base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # we will first aggregate the middle dataset on its own, this will
    # serve as a smoke test for the reuse of metadata objects later on
    sub.aggregate_metadata()
    base.save()
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.uninstall('subsub', check=False)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True, update_mode='all')
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.uninstall('sub', recursive=True, check=False)
    assert (not sub.is_installed())
    assert_repo_status(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)

Example #11

0

Show file

File: test_aggregation.py Project: psychoinformatics-de/datalad

def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    assert_repo_status(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.aggregate_metadata(recursive=True,
                            update_mode='all',
                            force_extraction=True)
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(objs, list(sorted(base.repo.find(objpath))))

Example #12

0

Show file

File: test_push.py Project: orbital-transfer-survey/datalad

def test_gh1426(origin_path, target_path):
    # set up a pair of repos, one the published copy of the other
    origin = Dataset(origin_path).create()
    target = mk_push_target(origin,
                            'target',
                            target_path,
                            annex=True,
                            bare=False)
    origin.push(to='target')
    assert_repo_status(origin.path)
    assert_repo_status(target.path)
    eq_(origin.repo.get_hexsha(DEFAULT_BRANCH),
        target.get_hexsha(DEFAULT_BRANCH))

    # gist of #1426 is that a newly added subdataset does not cause the
    # superdataset to get published
    origin.create('sub')
    assert_repo_status(origin.path)
    neq_(origin.repo.get_hexsha(DEFAULT_BRANCH),
         target.get_hexsha(DEFAULT_BRANCH))
    # now push
    res = origin.push(to='target')
    assert_result_count(res,
                        1,
                        status='ok',
                        type='dataset',
                        path=origin.path,
                        action='publish',
                        target='target',
                        operations=['fast-forward'])
    eq_(origin.repo.get_hexsha(DEFAULT_BRANCH),
        target.get_hexsha(DEFAULT_BRANCH))

Example #13

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    ok_clean_git(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.aggregate_metadata(recursive=True, update_mode='all', force_extraction=True)
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(
        objs,
        list(sorted(base.repo.find(objpath)))
    )

Example #14

0

Show file

def test_aggregate_with_unavailable_objects_from_subds(path, target):
    base = Dataset(op.join(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(op.join(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(op.join('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    base.meta_aggregate(recursive=True, into='all')
    assert_repo_status(base.path)

    # now make that a subdataset of a new one, so aggregation needs to get the
    # metadata objects first:
    super = Dataset(target).create()
    super.install("base", source=base.path)
    assert_repo_status(super.path)
    clone = Dataset(op.join(super.path, "base"))
    assert_repo_status(clone.path)
    objpath = PurePosixPath('.datalad/metadata/objects')
    objs = [
        o
        for o in sorted(clone.repo.get_annexed_files(with_content_only=False))
        if objpath in PurePosixPath(o).parents
    ]
    eq_(len(objs), 6)
    eq_(all(clone.repo.file_has_content(objs)), False)

    # now aggregate should get those metadata objects
    super.meta_aggregate(recursive=True, into='all')
    eq_(all(clone.repo.file_has_content(objs)), True)

Example #15

0

Show file

def test_aggregate_with_unavailable_objects_from_subds(path, target):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # now make that a subdataset of a new one, so aggregation needs to get the
    # metadata objects first:
    super = Dataset(target).create()
    super.install("base", source=base.path)
    ok_clean_git(super.path)
    clone = Dataset(opj(super.path, "base"))
    ok_clean_git(clone.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = [
        o
        for o in sorted(clone.repo.get_annexed_files(with_content_only=False))
        if o.startswith(objpath)
    ]
    eq_(len(objs), 6)
    eq_(all(clone.repo.file_has_content(objs)), False)

    # now aggregate should get those metadata objects
    super.aggregate_metadata(recursive=True,
                             update_mode='all',
                             force_extraction=False)
    eq_(all(clone.repo.file_has_content(objs)), True)

Example #16

0

Show file

def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(op.join(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(op.join(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(op.join('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # first a quick check that an unsupported 'into' mode causes an exception
    assert_raises(ValueError,
                  base.meta_aggregate,
                  recursive=True,
                  into='spaceship')
    # no for real
    base.meta_aggregate(recursive=True, into='all')
    assert_repo_status(base.path)
    objpath = op.join('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    assert_repo_status(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.meta_aggregate(recursive=True, into='all', force='fromscratch')
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(objs, list(sorted(base.repo.find(objpath))))

Example #17

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_publish_aggregated(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    base.create('sub', force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # create sibling and publish to it
    spath = opj(path, 'remote')
    base.create_sibling(
        name="local_target",
        sshurl="ssh://localhost",
        target_dir=spath)
    base.publish('.', to='local_target', transfer_data='all')
    remote = Dataset(spath)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # all object files a present in both datasets
    eq_(all(base.repo.file_has_content(objs)), True)
    eq_(all(remote.repo.file_has_content(objs)), True)
    # and we can squeeze the same metadata out
    eq_(
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in base.metadata('sub')],
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in remote.metadata('sub')],
    )

Example #18

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_basic_aggregate(path):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    #base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    # we will first aggregate the middle dataset on its own, this will
    # serve as a smoke test for the reuse of metadata objects later on
    sub.aggregate_metadata()
    base.save()
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.uninstall('subsub', check=False)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True, update_mode='all')
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.uninstall('sub', recursive=True, check=False)
    assert(not sub.is_installed())
    ok_clean_git(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)

Example #19

0

Show file

File: test_utils.py Project: hanke/datalad

def test_dirty(path):
    for mode in _dirty_modes:
        # does nothing without a dataset
        handle_dirty_dataset(None, mode)
    # placeholder, but not yet created
    ds = Dataset(path)
    # unknown mode
    assert_raises(ValueError, handle_dirty_dataset, ds, 'MADEUP')
    # not yet created is very dirty
    assert_raises(RuntimeError, handle_dirty_dataset, ds, 'fail')
    handle_dirty_dataset(ds, 'ignore')
    assert_raises(RuntimeError, handle_dirty_dataset, ds, 'save-before')
    # should yield a clean repo
    ds.create()
    orig_state = ds.repo.get_hexsha()
    _check_all_clean(ds, orig_state)
    # tainted: untracked
    with open(opj(ds.path, 'something'), 'w') as f:
        f.write('some')
    # we don't want to auto-add untracked files by saving (anymore)
    assert_raises(AssertionError, _check_auto_save, ds, orig_state)
    # tainted: staged
    ds.repo.add('something', git=True)
    orig_state = _check_auto_save(ds, orig_state)
    # tainted: submodule
    # not added to super on purpose!
    subds = ds.create('subds')
    _check_all_clean(subds, subds.repo.get_hexsha())
    ok_clean_git(ds.path)
    # subdataset must be added as a submodule!
    assert_equal(ds.subdatasets(result_xfm='relpaths'), ['subds'])

Example #20

0

Show file

File: test_ls.py Project: datalad/datalad

def test_ls_uninstalled(path):
    ds = Dataset(path)
    ds.create()
    ds.create('sub')
    ds.uninstall('sub', check=False)
    with swallow_outputs() as cmo:
        ls([path], recursive=True)
        assert_in('not installed', cmo.out)

Example #21

0

Show file

File: test_ls.py Project: vsoch/datalad

def test_ls_uninstalled(path):
    ds = Dataset(path)
    ds.create()
    ds.create('sub')
    ds.uninstall('sub', check=False)
    with swallow_outputs() as cmo:
        ls([path], recursive=True)
        assert_in('not installed', cmo.out)

Example #22

0

Show file

File: test_create.py Project: jwodder/datalad

def test_gh2927(path, linkpath):
    if has_symlink_capability():
        # make it more complicated by default
        Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath
    ds = Dataset(path).create()
    ds.create('subds_clean')
    assert_status('ok', ds.create(op.join('subds_clean', 'subds_lvl1_clean'),
                                  result_xfm=None, return_type='list'))

Example #23

0

Show file

def test_create_raises(path=None, outside_path=None):
    ds = Dataset(path)
    # incompatible arguments (annex only):
    assert_raises(ValueError, ds.create, annex=False, description='some')

    with open(op.join(path, "somefile.tst"), 'w') as f:
        f.write("some")
    # non-empty without `force`:
    assert_in_results(
        ds.create(force=False, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `--force` option to ignore'
    )
    # non-empty with `force`:
    ds.create(force=True)
    # create sub outside of super:
    assert_in_results(
        ds.create(outside_path, **raw),
        status='error',
        message=(
            'dataset containing given paths is not underneath the reference '
            'dataset %s: %s', ds, outside_path))
    obscure_ds = u"ds-" + OBSCURE_FILENAME
    # create a sub:
    ds.create(obscure_ds)
    # fail when doing it again
    assert_in_results(ds.create(obscure_ds, **raw),
                      status='error',
                      message=('collision with %s (dataset) in dataset %s',
                               str(ds.pathobj / obscure_ds), ds.path))

    # now deinstall the sub and fail trying to create a new one at the
    # same location
    ds.drop(obscure_ds, what='all', reckless='kill', recursive=True)
    assert_in(obscure_ds, ds.subdatasets(state='absent',
                                         result_xfm='relpaths'))
    # and now should fail to also create inplace or under
    assert_in_results(ds.create(obscure_ds, **raw),
                      status='error',
                      message=('collision with %s (dataset) in dataset %s',
                               str(ds.pathobj / obscure_ds), ds.path))
    assert_in_results(ds.create(op.join(obscure_ds, 'subsub'), **raw),
                      status='error',
                      message=('collision with %s (dataset) in dataset %s',
                               str(ds.pathobj / obscure_ds), ds.path))
    os.makedirs(op.join(ds.path, 'down'))
    with open(op.join(ds.path, 'down', "someotherfile.tst"), 'w') as f:
        f.write("someother")
    ds.save()
    assert_in_results(
        ds.create('down', **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s', ds.path,
                 [str(ds.pathobj / 'down' / 'someotherfile.tst')]),
    )

Example #24

0

Show file

File: utils_testdatasets.py Project: datalad/datalad

def _mk_submodule_annex(path, fname, fcontent):
    ca = dict(result_renderer='disabled')
    # a remote dataset with a subdataset underneath
    origds = Dataset(path).create(**ca)
    (origds.pathobj / fname).write_text(fcontent)
    # naming is weird, but a legacy artifact
    s1 = origds.create('subm 1', **ca)
    (s1.pathobj / fname).write_text(fcontent)
    s2 = origds.create('2', **ca)
    (s2.pathobj / fname).write_text(fcontent)
    origds.save(recursive=True, **ca)
    return origds

Example #25

0

Show file

File: test_create.py Project: jwodder/datalad

def test_create_sub_gh3463(path):
    ds = Dataset(path)
    ds.create()

    # Test non-bound call.
    with chpwd(ds.path):
        create("subds0", dataset=".")
    assert_repo_status(ds.path)

    # Test command-line invocation directly.
    Runner(cwd=ds.path).run(["datalad", "create", "-d.", "subds1"])
    assert_repo_status(ds.path)

Example #26

0

Show file

File: test_create.py Project: jwodder/datalad

def check_create_initopts_form(form, path):
    path = Path(path)

    template_dir = path / "templates"
    template_dir.mkdir()
    (template_dir / "foo").write_text("")

    forms = {"list": [f"--template={template_dir}"],
             "dict": {"template": str(template_dir)}}

    ds = Dataset(path / "ds")
    ds.create(initopts=forms[form])
    ok_exists(ds.repo.dot_git / "foo")

Example #27

0

Show file

File: test_remove.py Project: datalad/datalad

def test_remove_subdataset_nomethod(path=None):
    ds = Dataset(path).create()
    ds.create('subds')
    with chpwd(path):
        # fails due to unique state
        res = remove('subds', on_failure='ignore')
        assert_in_results(res,
                          action='uninstall',
                          status='error',
                          type='dataset')
        res = remove('subds', reckless='availability', on_failure='ignore')
        assert_in_results(res, action='uninstall', status='ok', type='dataset')
        assert_in_results(res, action='remove', status='ok')
        assert_in_results(res, action='save', status='ok')

Example #28

0

Show file

File: test_utils.py Project: debanjum/datalad

def make_demo_hierarchy_datasets(path, tree):
    created_ds = []
    for node, items in tree.items():
        node_path = opj(path, node)
        if isinstance(items, dict):
            ds = make_demo_hierarchy_datasets(node_path, items)
            created_ds.append(ds)
    topds = Dataset(path)
    if not topds.is_installed():
        topds.create(force=True)
        # TODO this farce would not be necessary if add() could add subdatasets
        for ds in created_ds:
            _install_subds_inplace(ds=topds, path=ds.path, relativepath=relpath(ds.path, topds.path))
            ds.save()
        return topds

Example #29

0

Show file

File: test_save.py Project: jelmer/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.add('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    ok_clean_git(sub.path)
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(path=sub.path))
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    # `save -d .` saves the state change in the subdataset, but leaves any untracked
    # content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save())
    ok_clean_git(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.add('new2')
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    with chpwd(parent.path):
        assert_status(
            # notneeded to save sub, but need to save parent
            ['ok', 'notneeded'],
            # the key condition of this test is that no reference dataset is
            # given!
            save(path='sub', super_datasets=True))
    # save super must not cause untracked content to be commited!
    ok_clean_git(parent.path, untracked=['untracked'])

Example #30

0

Show file

File: test_create.py Project: datalad/datalad

def test_create_subdataset_hierarchy_from_top(path):
    # how it would look like to overlay a subdataset hierarchy onto
    # an existing directory tree
    ds = Dataset(op.join(path, 'origin')).create(force=True)
    # we got a dataset ....
    ok_(ds.is_installed())
    # ... but it has untracked content
    ok_(ds.repo.dirty)
    subds = ds.create(u"ds-" + OBSCURE_FILENAME, force=True)
    ok_(subds.is_installed())
    ok_(subds.repo.dirty)
    subsubds = subds.create('subsub', force=True)
    ok_(subsubds.is_installed())
    ok_(subsubds.repo.dirty)
    ok_(ds.id != subds.id != subsubds.id)
    ds.save(updated=True, recursive=True)
    # 'file*' in each repo was untracked before and should remain as such
    # (we don't want a #1419 resurrection
    ok_(ds.repo.dirty)
    ok_(subds.repo.dirty)
    ok_(subsubds.repo.dirty)
    # if we add these three, we should get clean
    ds.save([
        'file1',
        op.join(subds.path, 'file2'),
        op.join(subsubds.path, 'file3')])
    assert_repo_status(ds.path)
    ok_(ds.id != subds.id != subsubds.id)

Example #31

0

Show file

def test_inherit_src_candidates(lcl, storepath, url):
    lcl = Path(lcl)
    storepath = Path(storepath)
    # dataset with a subdataset
    ds1 = Dataset(lcl / 'ds1').create()
    ds1sub = ds1.create('sub')
    # a different dataset into which we install ds1, but do not touch its subds
    ds2 = Dataset(lcl / 'ds2').create()
    ds2.clone(source=ds1.path, path='mysub')

    # we give no dataset a source candidate config!
    # move all dataset into the store
    for d in (ds1, ds1sub, ds2):
        _move2store(storepath, d)

    # now we must be able to obtain all three datasets from the store
    riaclone = clone(
        'ria+{}#{}'.format(
            # store URL
            url,
            # ID of the root dataset
            ds2.id),
        lcl / 'clone',
    )
    # what happens is the the initial clone call sets a source candidate
    # config, because it sees the dataset coming from a store
    # all obtained subdatasets get the config inherited on-clone
    datasets = riaclone.get('.', get_data=False, recursive=True, result_xfm='datasets')
    # we get two subdatasets
    eq_(len(datasets), 2)
    for ds in datasets:
        eq_(ConfigManager(dataset=ds, source='dataset-local').get(
            'datalad.get.subdataset-source-candidate-200origin'),
            'ria+%s#{id}' % url)

Example #32

0

Show file

File: test_push.py Project: vsoch/datalad

def test_invalid_call(origin, tdir):
    ds = Dataset(origin).create()
    # no target
    assert_status('impossible', ds.push(on_failure='ignore'))
    # no dataset
    with chpwd(tdir):
        assert_raises(InsufficientArgumentsError, Push.__call__)
    # dataset, but outside path
    assert_raises(IncompleteResultsError, ds.push, path=tdir)
    # given a path constraint that doesn't match anything, will cause
    # nothing to be done
    assert_status('notneeded', ds.push(path=ds.pathobj / 'nothere'))

    # unavailable subdataset
    dummy_sub = ds.create('sub')
    dummy_sub.uninstall()
    assert_in('sub', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # now an explicit call to publish the unavailable subdataset
    assert_raises(ValueError, ds.push, 'sub')

    target = mk_push_target(ds, 'target', tdir, annex=True)
    # revision that doesn't exist
    assert_raises(ValueError,
                  ds.push,
                  to='target',
                  since='09320957509720437523')

Example #33

0

Show file

File: test_create.py Project: jwodder/datalad

def test_create_subdataset_hierarchy_from_top(path):
    # how it would look like to overlay a subdataset hierarchy onto
    # an existing directory tree
    ds = Dataset(op.join(path, 'origin')).create(force=True)
    # we got a dataset ....
    ok_(ds.is_installed())
    # ... but it has untracked content
    ok_(ds.repo.dirty)
    subds = ds.create(u"ds-" + OBSCURE_FILENAME, force=True)
    ok_(subds.is_installed())
    ok_(subds.repo.dirty)
    subsubds = subds.create('subsub', force=True)
    ok_(subsubds.is_installed())
    ok_(subsubds.repo.dirty)
    ok_(ds.id != subds.id != subsubds.id)
    ds.save(updated=True, recursive=True)
    # 'file*' in each repo was untracked before and should remain as such
    # (we don't want a #1419 resurrection
    ok_(ds.repo.dirty)
    ok_(subds.repo.dirty)
    ok_(subsubds.repo.dirty)
    # if we add these three, we should get clean
    ds.save([
        'file1',
        op.join(subds.path, 'file2'),
        op.join(subsubds.path, 'file3')])
    assert_repo_status(ds.path)
    ok_(ds.id != subds.id != subsubds.id)

Example #34

0

Show file

File: test_save.py Project: gi114/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

Example #35

0

Show file

File: test_push.py Project: vsoch/datalad

def test_push_subds_no_recursion(src_path, dst_top, dst_sub, dst_subsub):
    # dataset with one submodule and one subsubmodule
    top = Dataset(src_path).create()
    sub = top.create('sub m')
    test_file = sub.pathobj / 'subdir' / 'test_file'
    test_file.parent.mkdir()
    test_file.write_text('some')
    subsub = sub.create(sub.pathobj / 'subdir' / 'subsub m')
    top.save(recursive=True)
    assert_repo_status(top.path)
    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)
    # now publish, but NO recursion, instead give the parent dir of
    # both a subdataset and a file in the middle subdataset
    res = top.push(
        to='target',
        # give relative to top dataset to elevate the difficulty a little
        path=str(test_file.relative_to(top.pathobj).parent))
    assert_status('ok', res)
    assert_in_results(res, action='publish', type='dataset', path=top.path)
    assert_in_results(res, action='publish', type='dataset', path=sub.path)
    assert_in_results(res, action='copy', type='file', path=str(test_file))
    # the lowest-level subdataset isn't touched
    assert_not_in_results(res,
                          action='publish',
                          type='dataset',
                          path=subsub.path)

Example #36

0

Show file

File: test_save.py Project: datalad/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

Example #37

0

Show file

def test_aggregate_removal(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.save(recursive=True)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=subsub.path)
    # check that we only have object files that are listed in agginfo
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # now delete the deepest subdataset to test cleanup of aggregated objects
    # in the top-level ds
    base.remove(opj('sub', 'subsub'), check=False)
    # now aggregation has to detect that subsub is not simply missing, but gone
    # for good
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    # internally consistent state
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # info on subsub was removed at all levels
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 2)
    res = sub.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 1)

Example #38

0

Show file

def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = opj(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    ok_clean_git(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.run(rerun=True)
    ok_clean_git(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())
    # if I give another command, it will be ignored
    with chpwd(nodspath), \
            swallow_logs(new_level=logging.WARNING) as cml, \
            swallow_outputs():
        ds.run('30BANG3934', rerun=True)
        cml.assert_logged("Ignoring provided command in --rerun mode", level="WARNING")
    ok_clean_git(ds.path)
    eq_('xxx\n', open(probe_path).read())

Example #39

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_aggregate_removal(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.add('.', recursive=True)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=subsub.path)
    # check that we only have object files that are listed in agginfo
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # now delete the deepest subdataset to test cleanup of aggregated objects
    # in the top-level ds
    base.remove(opj('sub', 'subsub'), check=False)
    # now aggregation has to detect that subsub is not simply missing, but gone
    # for good
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    # internally consistent state
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # info on subsub was removed at all levels
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 2)
    res = sub.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 1)

Example #40

0

Show file

File: test_aggregation.py Project: raamana/datalad

def test_partial_aggregation(path):
    ds = Dataset(path).create(force=True)
    sub1 = ds.create('sub1', force=True)
    sub2 = ds.create('sub2', force=True)
    ds.add('.', recursive=True)
    ds.aggregate_metadata(recursive=True)
    # baseline, recursive aggregation gets us something for all three datasets
    res = ds.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    # now let's do partial aggregation from just one subdataset
    # we should not loose information on the other datasets
    # as this would be a problem any time anything in a dataset
    # subtree is missing: no installed, too expensive to reaggregate, ...
    ds.aggregate_metadata(path='sub1', incremental=True)
    res = ds.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=sub2.path)
    # from-scratch aggregation kills datasets that where not listed
    ds.aggregate_metadata(path='sub1', incremental=False)
    res = ds.metadata(get_aggregates=True)
    assert_result_count(res, 2)
    assert_result_count(res, 0, path=sub2.path)
    # now reaggregated in full
    ds.aggregate_metadata(recursive=True)
    # make change in sub1
    sub1.unlock('here')
    with open(opj(sub1.path, 'here'), 'w') as f:
        f.write('fresh')
    ds.save(recursive=True)
    ok_clean_git(path)

Example #41

0

Show file

def test_aggregate_query(path, randompath):
    ds = Dataset(path).create(force=True)
    # no magic change to actual dataset metadata due to presence of
    # aggregated metadata
    res = ds.meta_dump(reporton='datasets', on_failure='ignore')
    assert_result_count(res, 0)
    # but we can now ask for metadata of stuff that is unknown on disk
    res = ds.meta_dump(op.join('sub', 'deep', 'some'), reporton='datasets')
    assert_result_count(res, 1)
    eq_({'homepage': 'http://top.example.com'}, res[0]['metadata'])
    sub = ds.create('sub', force=True)
    # when no reference dataset there is NO magic discovery of the relevant
    # dataset
    with chpwd(randompath):
        assert_raises(ValueError,
                      meta_dump,
                      op.join(path, 'sub', 'deep', 'some'),
                      reporton='datasets')
    # but inside a dataset things work
    with chpwd(ds.path):
        res = meta_dump(op.join(path, 'sub', 'deep', 'some'),
                        reporton='datasets')
        assert_result_count(res, 1)
        # the metadata in the discovered top dataset is return, not the
        # metadata in the subdataset
        eq_({'homepage': 'http://top.example.com'}, res[0]['metadata'])
    # when a reference dataset is given, it will be used as the metadata
    # provider
    res = sub.meta_dump(op.join('deep', 'some'), reporton='datasets')
    assert_result_count(res, 1)
    eq_({'homepage': 'http://sub.example.com'}, res[0]['metadata'])

Example #42

0

Show file

File: test_utils.py Project: datalad/datalad

def make_demo_hierarchy_datasets(path, tree, parent=None):
    if parent is None:
        parent = Dataset(path).create(force=True)
    for node, items in tree.items():
        if isinstance(items, dict):
            node_path = opj(path, node)
            nodeds = parent.create(node_path, force=True)
            make_demo_hierarchy_datasets(node_path, items, parent=nodeds)
    return parent

Example #43

0

Show file

File: test_create.py Project: datalad/datalad

def test_create_sub(path):

    ds = Dataset(path)
    ds.create()

    # 1. create sub and add to super:
    subds = ds.create(op.join("some", "what", "deeper"))
    ok_(isinstance(subds, Dataset))
    ok_(subds.is_installed())
    assert_repo_status(subds.path, annex=True)
    assert_in(
        'submodule.some/what/deeper.datalad-id={}'.format(
            subds.id),
        ds.repo._git_custom_command(
            '',
            ['git', 'config', '--file', '.gitmodules', '--list'])[0]
    )

    # subdataset is known to superdataset:
    assert_in(op.join("some", "what", "deeper"),
              ds.subdatasets(result_xfm='relpaths'))
    # and was committed:
    assert_repo_status(ds.path)

    # subds finds superdataset
    ok_(subds.get_superdataset() == ds)

    # 2. create sub without adding to super:
    subds2 = Dataset(op.join(path, "someother")).create()
    ok_(isinstance(subds2, Dataset))
    ok_(subds2.is_installed())
    assert_repo_status(subds2.path, annex=True)

    # unknown to superdataset:
    assert_not_in("someother", ds.subdatasets(result_xfm='relpaths'))

    # 3. create sub via super:
    subds3 = ds.create("third", no_annex=True)
    ok_(isinstance(subds3, Dataset))
    ok_(subds3.is_installed())
    assert_repo_status(subds3.path, annex=False)
    assert_in("third", ds.subdatasets(result_xfm='relpaths'))

Example #44

0

Show file

File: test_create.py Project: datalad/datalad

def test_create(path):
    ds = Dataset(path)
    ds.create(
        description="funny",
        # custom git init option
        initopts=dict(shared='world'))
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git', 'annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'),
        ds.id)

Example #45

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_update_strategy(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    # we start clean
    for ds in base, sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the base dataset only, nothing below changes
    base.aggregate_metadata()
    eq_(len(_get_contained_objs(base)), 2)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the entire tree, but by default only updates
    # the top-level dataset with all objects, none of the leaf
    # or intermediate datasets get's touched
    base.aggregate_metadata(recursive=True)
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_referenced_objs(base)), 6)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    # it is impossible to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status(
            'impossible',
            ds.metadata(get_aggregates=True, on_failure='ignore'))
    # get the full metadata report
    target_meta = base.metadata(return_type='list')

    # now redo full aggregation, this time updating all
    # (intermediate) datasets
    base.aggregate_metadata(recursive=True, update_mode='all')
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_contained_objs(sub)), 4)
    eq_(len(_get_contained_objs(subsub)), 2)
    # it is now OK to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status(
            'ok',
            ds.metadata(get_aggregates=True, on_failure='ignore'))

    # all of that has no impact on the reported metadata
    eq_(target_meta, base.metadata(return_type='list'))

Example #46

0

Show file

File: test_save.py Project: datalad/datalad

def test_bf1886(path):
    parent = Dataset(path).create()
    parent.create('sub')
    assert_repo_status(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', op.join(parent.path, 'down'))
    parent.save('down')
    assert_repo_status(parent.path)
    # now symlink pointing up
    os.makedirs(op.join(parent.path, 'subdir', 'subsubdir'))
    os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up'))
    parent.save(op.join('subdir', 'up'))
    # 'all' to avoid the empty dir being listed
    assert_repo_status(parent.path, untracked_mode='all')
    # now symlink pointing 2xup, as in #1886
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.save(op.join('subdir', 'subsubdir', 'upup'))
    assert_repo_status(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    create(op.join(parent.path, 'sub2'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub2'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')])
    assert_repo_status(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    create(op.join(parent.path, 'sub3'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub3'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(op.join(parent.path, 'subdir', 'subsubdir')):
        save([op.join(parent.path, 'sub3'),
              op.join(parent.path, 'subdir', 'subsubdir', 'upup3')])
    assert_repo_status(parent.path)

Example #47

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_aggregate_query(path):
    ds = Dataset(path).create(force=True)
    # no magic change to actual dataset metadata due to presence of
    # aggregated metadata
    res = ds.metadata(reporton='datasets', on_failure='ignore')
    assert_result_count(res, 1)
    assert_not_in('metadata', res[0])
    # but we can now ask for metadata of stuff that is unknown on disk
    res = ds.metadata(opj('sub', 'deep', 'some'), reporton='datasets')
    assert_result_count(res, 1)
    eq_({'homepage': 'http://top.example.com'}, res[0]['metadata'])
    # when no reference dataset is given the command will report the
    # aggregated metadata as it is recorded in the dataset that is the
    # closest parent on disk
    ds.create('sub', force=True)
    res = metadata(opj(path, 'sub', 'deep', 'some'), reporton='datasets')
    assert_result_count(res, 1)
    eq_({'homepage': 'http://sub.example.com'}, res[0]['metadata'])
    # when a reference dataset is given, it will be used as the metadata
    # provider
    res = ds.metadata(opj('sub', 'deep', 'some'), reporton='datasets')
    assert_result_count(res, 1)
    eq_({'homepage': 'http://top.example.com'}, res[0]['metadata'])

Example #48

0

Show file

File: test_diff.py Project: hanke/datalad

def test_diff_helper(path):
    # make test dataset components of interesting states
    ds = Dataset.create(path, force=True)
    # detached dataset, not a submodule
    nosub = Dataset.create(opj(path, 'nosub'))
    # unmodified, proper submodule
    sub_clean = ds.create('sub_clean', force=True)
    # proper submodule, but commited modifications not commited in parent
    sub_modified = ds.create('sub_modified', force=True)
    sub_modified.add('modified')
    # proper submodule with untracked changes
    sub_dirty = ds.create('sub_dirty', force=True)
    ds.add(['clean', 'modified'])
    ds.unlock('modified')
    with open(opj(ds.path, 'modified'), 'w') as f:
        f.write('modified_content')
    file_mod = opj(ds.path, 'modified')
    # standard `git diff` no special args, reports modified, but not untracked
    res = list(_parse_git_diff(ds.path))
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=file_mod)
    assert_result_count(res, 1, path=sub_modified.path)
    assert_result_count(res, 1, path=sub_dirty.path)

Example #49

0

Show file

File: test_diff.py Project: hanke/datalad

def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset')
    # now recursive
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {'onefile': 'tobeadded', 'sub': {'twofile': 'tobeadded'}})
    res = ds.diff(recursive=True, report_untracked='all')
    assert_result_count(res, 3)
    assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file')
    assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
    assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file')
    # save sub
    sub.add('.')
    # save sub in parent
    ds.save()
    # save addition in parent
    ds.add('.')
    ok_clean_git(ds.path)
    # look at the last change, only one file was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')

    # now the exact same thing with recursion, must not be different from the call
    # above
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    # last change in parent
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')

    # one further back brings in the modified subdataset, and the added file within it
    res = ds.diff(recursive=True, revision='HEAD~2..HEAD')
    assert_result_count(res, 3)
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')
    assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file')
    assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')

Example #50

0

Show file

File: test_fileinfo.py Project: datalad/datalad

def test_subds_path(path):
    # a dataset with a subdataset with a file, all neatly tracked
    ds = Dataset(path).create()
    subds = ds.create('sub')
    assert_repo_status(path)
    with (subds.pathobj / 'some.txt').open('w') as f:
        f.write(u'test')
    ds.save(recursive=True)
    assert_repo_status(path)

    # querying the toplevel dataset repo for a subdspath should
    # report the subdataset record in the dataset
    # (unlike `git status`, which is silent for subdataset paths),
    # but definitely not report the subdataset as deleted
    # https://github.com/datalad/datalad-revolution/issues/17
    stat = ds.repo.status(paths=[op.join('sub', 'some.txt')])
    assert_equal(list(stat.keys()), [subds.repo.pathobj])
    assert_equal(stat[subds.repo.pathobj]['state'], 'clean')

Example #51

0

Show file

File: test_create.py Project: datalad/datalad

def test_nested_create(path):
    # to document some more organic usage pattern
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    lvl2relpath = op.join('lvl1', 'lvl2')
    lvl2path = op.join(ds.path, lvl2relpath)
    os.makedirs(lvl2path)
    os.makedirs(op.join(ds.path, 'lvl1', 'empty'))
    with open(op.join(lvl2path, 'file'), 'w') as f:
        f.write('some')
    ok_(ds.save())
    # Empty directories are filtered out.
    assert_repo_status(ds.path, untracked=[])
    # later create subdataset in a fresh dir
    # WINDOWS FAILURE IS NEXT LINE
    subds1 = ds.create(op.join('lvl1', 'subds'))
    assert_repo_status(ds.path, untracked=[])
    eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')])
    # later create subdataset in an existing empty dir
    subds2 = ds.create(op.join('lvl1', 'empty'))
    assert_repo_status(ds.path)
    # later try to wrap existing content into a new subdataset
    # but that won't work
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=(
            'collision with content in parent dataset at %s: %s',
            ds.path, [op.join(lvl2path, 'file')]))
    # even with force, as to do this properly complicated surgery would need to
    # take place
    # MIH disable shaky test till proper dedicated upfront check is in-place in `create`
    # gh-1725
    #assert_in_results(
    #    ds.create(lvl2relpath, force=True,
    #              on_failure='ignore', result_xfm=None, result_filter=None),
    #    status='error', action='add')
    # only way to make it work is to unannex the content upfront
    ds.repo._run_annex_command('unannex', annex_options=[op.join(lvl2relpath, 'file')])
    # nothing to save, git-annex commits the unannex itself, but only on v5
    ds.repo.commit()
    # still nothing without force
    # "err='lvl1/lvl2' already exists in the index"
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message='will not create a dataset in a non-empty directory, use `force` option to ignore')
    # XXX even force doesn't help, because (I assume) GitPython doesn't update
    # its representation of the Git index properly
    ds.create(lvl2relpath, force=True)
    assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))

Example #52

0

Show file

File: test_diff.py Project: datalad/datalad

def test_diff_rsync_syntax(path):
    # three nested datasets
    ds = Dataset(path).create()
    subds = ds.create('sub')
    subsubds = subds.create('deep')
    justtop = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub')
    # we only get a single result, the subdataset in question
    assert_result_count(justtop, 1)
    assert_result_count(justtop, 1, type='dataset', path=subds.path)
    # now with "peak inside the dataset" syntax
    inside = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub' + os.sep)
    # we get both subdatasets, but nothing else inside the nested one
    assert_result_count(inside, 2, type='dataset')
    assert_result_count(inside, 1, type='dataset', path=subds.path)
    assert_result_count(inside, 1, type='dataset', path=subsubds.path)
    assert_result_count(inside, 0, type='file', parentds=subsubds.path)
    # just for completeness, we get more when going full recursive
    rec = ds.diff(fr=PRE_INIT_COMMIT_SHA, recursive=True, path='sub' + os.sep)
    assert(len(inside) < len(rec))

Example #53

0

Show file

File: test_save.py Project: debanjum/datalad

def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save("add a new file", all_changes=False)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save("modified new_file.tst", all_changes=True)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_false(ds.save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(ds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds.save(all_changes=True)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

Example #54

0

Show file

File: test_save.py Project: hanke/datalad

def test_bf1886(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = create(opj(parent.path, 'sub2'))
    os.symlink(
        opj(pardir, pardir, 'sub2'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = create(opj(parent.path, 'sub3'))
    os.symlink(
        opj(pardir, pardir, 'sub3'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        add([opj(parent.path, 'sub3'),
             opj(parent.path, 'subdir', 'subsubdir', 'upup3')])
    # here is where we need to disagree with the repo in #1886
    # we would not expect that `add` registers sub3 as a subdataset
    # of parent, because no reference dataset was given and the
    # command cannot decide (with the current semantics) whether
    # it should "add anything in sub3 to sub3" or "add sub3 to whatever
    # sub3 is in"
    ok_clean_git(parent.path, untracked=['sub3/'])

Example #55

0

Show file

File: test_save.py Project: datalad/datalad

def test_bf1886(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = create(opj(parent.path, 'sub2'))
    os.symlink(
        opj(pardir, pardir, 'sub2'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = create(opj(parent.path, 'sub3'))
    os.symlink(
        opj(pardir, pardir, 'sub3'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        rev_save([opj(parent.path, 'sub3'),
                  opj(parent.path, 'subdir', 'subsubdir', 'upup3')])
    # in contrast to `add` only operates on a single top-level dataset
    # although it is not specified, it get's discovered based on the PWD
    # the logic behind that feels a bit shaky
    # consult discussion in https://github.com/datalad/datalad/issues/3230
    # if this comes up as an issue at some point
    ok_clean_git(parent.path)

Example #56

0

Show file

File: test_save.py Project: datalad/datalad

def test_gh1597(path):
    if 'APPVEYOR' in os.environ:
        # issue only happens on appveyor, Python itself implodes
        # cannot be reproduced on a real windows box
        raise SkipTest(
            'this test causes appveyor to crash, reason unknown')
    ds = Dataset(path).create()
    sub = ds.create('sub')
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # now modify .gitmodules with another command
    ds.subdatasets(contains=sub.path, set_property=[('this', 'that')])
    # now modify low-level
    with open(op.join(ds.path, '.gitmodules'), 'a') as f:
        f.write('\n')
    assert_repo_status(ds.path, modified=['.gitmodules'])
    ds.save('.gitmodules')
    # must not come under annex mangement
    assert_not_in(
        'key',
        ds.repo.annexstatus(paths=['.gitmodules']).popitem()[1])

Example #57

0

Show file

File: test_subdataset.py Project: datalad/datalad

def test_state(path):
    ds = Dataset.create(path)
    sub = ds.create('sub')
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # by default we are not reporting any state info
    assert_not_in('state', res[0])
    # uninstall the subdataset
    ds.uninstall('sub')
    # normale 'gone' is "absent"
    assert_false(sub.is_installed())
    assert_result_count(
        ds.subdatasets(), 1, path=sub.path, state='absent')
    # with directory totally gone also
    os.rmdir(sub.path)
    assert_result_count(
        ds.subdatasets(), 1, path=sub.path, state='absent')
    # putting dir back, no change
    os.makedirs(sub.path)
    assert_result_count(
        ds.subdatasets(), 1, path=sub.path, state='absent')

Example #58

0

Show file

File: test_utils.py Project: debanjum/datalad

def test_get_dataset_directories(path):
    assert_raises(ValueError, get_dataset_directories, path)
    ds = Dataset(path).create()
    # ignores .git always and .datalad by default
    assert_equal(get_dataset_directories(path), [])
    assert_equal(get_dataset_directories(path, ignore_datalad=False),
                 [opj(path, '.datalad')])
    # find any directory, not just those known to git
    testdir = opj(path, 'newdir')
    os.makedirs(testdir)
    assert_equal(get_dataset_directories(path), [testdir])
    # do not find files
    with open(opj(path, 'somefile'), 'w') as f:
        f.write('some')
    assert_equal(get_dataset_directories(path), [testdir])
    # find more than one directory
    testdir2 = opj(path, 'newdir2')
    os.makedirs(testdir2)
    assert_equal(sorted(get_dataset_directories(path)), sorted([testdir, testdir2]))
    # find subdataset mount points
    subdsdir = opj(path, 'sub')
    subds = ds.create(subdsdir)
    assert_equal(sorted(get_dataset_directories(path)), sorted([testdir, testdir2, subdsdir]))
    # do not find content within subdataset dirs
    os.makedirs(opj(path, 'sub', 'deep'))
    assert_equal(sorted(get_dataset_directories(path)), sorted([testdir, testdir2, subdsdir]))
    subsubdsdir = opj(subdsdir, 'subsub')
    subds.create(subsubdsdir)
    assert_equal(sorted(get_dataset_directories(path)), sorted([testdir, testdir2, subdsdir]))
    # find nested directories
    testdir3 = opj(testdir2, 'newdir21')
    os.makedirs(testdir3)
    assert_equal(sorted(get_dataset_directories(path)), sorted([testdir, testdir2, testdir3, subdsdir]))
    # only return hits below the search path
    assert_equal(sorted(get_dataset_directories(testdir2)), sorted([testdir3]))
    # empty subdataset mount points are reported too
    ds.uninstall(subds.path, check=False, recursive=True)
    ok_(not subds.is_installed())
    ok_(os.path.exists(subds.path))
    assert_equal(sorted(get_dataset_directories(path)), sorted([testdir, testdir2, testdir3, subdsdir]))

Example #59

0

Show file

File: test_aggregation.py Project: hanke/datalad

def test_partial_aggregation(path):
    ds = Dataset(path).create(force=True)
    sub1 = ds.create('sub1', force=True)
    sub2 = ds.create('sub2', force=True)
    ds.add('.', recursive=True)

    # if we aggregate a path(s) and say to recurse, we must not recurse into
    # the dataset itself and aggregate others
    ds.aggregate_metadata(path='sub1', recursive=True)
    res = ds.metadata(get_aggregates=True)
    assert_result_count(res, 1, path=ds.path)
    assert_result_count(res, 1, path=sub1.path)
    # so no metadata aggregates for sub2 yet
    assert_result_count(res, 0, path=sub2.path)

    ds.aggregate_metadata(recursive=True)
    # baseline, recursive aggregation gets us something for all three datasets
    res = ds.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    # now let's do partial aggregation from just one subdataset
    # we should not loose information on the other datasets
    # as this would be a problem any time anything in a dataset
    # subtree is missing: not installed, too expensive to reaggregate, ...
    ds.aggregate_metadata(path='sub1', incremental=True)
    res = ds.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=sub2.path)
    # from-scratch aggregation kills datasets that where not listed
    ds.aggregate_metadata(path='sub1', incremental=False)
    res = ds.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=sub2.path)
    # now reaggregated in full
    ds.aggregate_metadata(recursive=True)
    # make change in sub1
    sub1.unlock('here')
    with open(opj(sub1.path, 'here'), 'w') as f:
        f.write('fresh')
    ds.save(recursive=True)
    ok_clean_git(path)

Example #60

0

Show file

File: test_save.py Project: datalad/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.add('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    ok_clean_git(sub.path)
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(path=sub.path))
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    # `save -d .` saves the state change in the subdataset, but leaves any untracked
    # content alone
    with chpwd(parent.path):
        assert_status('ok', parent._save())
    ok_clean_git(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.add('new2')
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    with chpwd(parent.path):
        assert_status(
            # notneeded to save sub, but need to save parent
            ['ok', 'notneeded'],
            # the key condition of this test is that no reference dataset is
            # given!
            save(path='sub', super_datasets=True))
    # save super must not cause untracked content to be commited!
    ok_clean_git(parent.path, untracked=['untracked'])