Esempio n. 1
0
def test_annotate_paths(dspath, nodspath):
    # this test doesn't use API`remove` to avoid circularities
    ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy)
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)

    with chpwd(dspath):
        # with and without an explicitly given path the result is almost the
        # same inside a dataset
        without_path = annotate_paths(on_failure='ignore')
        pwd_res = annotate_paths(path='.', on_failure='ignore')
        assert_result_count(without_path, 1, type='dataset', path=dspath)
        assert_result_count(pwd_res,
                            1,
                            type='dataset',
                            path=dspath,
                            orig_request='.',
                            raw_input=True)
        # make sure going into a subdataset vs giving it as a path has no
        # structural impact
        eq_([{
            k: v
            for k, v in ap.items()
            if k not in ('registered_subds', 'raw_input', 'orig_request',
                         'refds')
        } for ap in annotate_paths(path='b', recursive=True)], [{
            k: v
            for k, v in ap.items()
            if k not in ('registered_subds', 'raw_input', 'orig_request',
                         'refds')
        } for ap in annotate_paths(dataset='b', recursive=True)])
    # now do it again, pointing to the ds directly
    res = ds.annotate_paths(on_failure='ignore')
    # no request, no refds, but otherwise the same
    eq_(len(res), len(pwd_res))
    eq_(
        {
            k: pwd_res[0][k]
            for k in pwd_res[0] if k in ('path', 'type', 'action', 'status')
        }, {k: res[0][k]
            for k in res[0] if k not in ('refds', )})

    # will refuse a path that is not a dataset as refds
    res = annotate_paths(dataset=nodspath, on_failure='ignore')
    assert_result_count(res,
                        1,
                        status='error',
                        path=nodspath,
                        message='given reference dataset is not a dataset')

    # recursion with proper base dataset
    parentds = Dataset(opj(dspath, 'a'))
    base_res = parentds.annotate_paths(recursive=True)
    # needs to find 'aa' and the base
    assert_result_count(base_res, 2)
    assert_result_count(base_res, 2, type='dataset')
    assert_result_count(base_res,
                        1,
                        type='dataset',
                        parentds=parentds.path,
                        path=opj(parentds.path, 'aa'),
                        status='')
    # same recursion but without a base dataset
    res = annotate_paths(path=opj(dspath, 'a'), recursive=True)
    # needs to find 'aa' and 'a' again
    assert_result_count(res, 2)
    eq_(res[-1],
        {k: base_res[-1][k]
         for k in base_res[-1] if k not in ('refds', )})
    assert_result_count(
        res,
        1,
        type='dataset',
        status='',
        # it does not auto-discover parent datasets without force or a refds
        #parentds=parentds.path,
        path=parentds.path)
    # but we can force parent discovery
    res = parentds.annotate_paths(path=opj(dspath, 'a'),
                                  recursive=True,
                                  force_parentds_discovery=True)
    assert_result_count(res, 2)
    assert_result_count(res,
                        1,
                        type='dataset',
                        status='',
                        parentds=dspath,
                        path=parentds.path)

    # recursion with multiple disjoint seeds, no common base
    eq_([
        basename(p) for p in annotate_paths(path=[
            opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba')
        ],
                                            recursive=True,
                                            result_xfm='paths')
    ], ['a', 'aa', 'bba', 'bbaa'])

    # recursion with partially overlapping seeds, no duplicate results
    eq_([
        basename(p) for p in annotate_paths(path=[
            opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba')
        ],
                                            recursive=True,
                                            result_xfm='paths')
    ], ['b', 'ba', 'bb', 'bba', 'bbaa'])

    # get straight from a file
    fpath = opj('a', 'aa', 'file_aa')
    res = ds.annotate_paths(fpath)
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        orig_request=fpath,
                        raw_input=True,
                        type='file',
                        path=opj(ds.path, fpath),
                        parentds=opj(ds.path, 'a', 'aa'),
                        status='')
    # now drop it
    dropres = ds.drop(fpath, check=False)
    assert_result_count(dropres, 1, path=res[0]['path'], status='ok')
    # ask for same file again, use 'notneeded' for unavailable to try trigger
    # any difference
    droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded')
    # but we get the same result
    eq_(res, droppedres)

    # now try the same on an uninstalled dataset
    subdspath = opj('b', 'bb')
    # before
    before_res = ds.annotate_paths(subdspath,
                                   recursive=True,
                                   unavailable_path_status='error')
    assert_result_count(before_res, 3, status='', type='dataset')
    uninstall_res = ds.uninstall(subdspath, recursive=True, check=False)
    assert_result_count(uninstall_res, 3, status='ok', type='dataset')
    # after
    after_res = ds.annotate_paths(subdspath,
                                  unavailable_path_status='error',
                                  on_failure='ignore')
    # uninstall hides all low-level datasets
    assert_result_count(after_res, 1)
    # but for the top-most uninstalled one it merely reports absent state now
    assert_result_count(after_res,
                        1,
                        state='absent',
                        **{
                            k: before_res[0][k]
                            for k in before_res[0]
                            if k not in ('state', 'status')
                        })
    # however, this beauty doesn't come for free, so it can be disabled
    # which will make the uninstalled subdataset like a directory in the
    # parent (or even just a non-existing path, if the mountpoint dir isn't
    # present
    after_res = ds.annotate_paths(subdspath, force_subds_discovery=False)
    assert_result_count(after_res,
                        1,
                        type='directory',
                        path=before_res[0]['path'],
                        parentds=before_res[0]['parentds'])
    # feed annotated paths into annotate_paths, it shouldn't change things
    # upon second run
    # datasets and file
    res = ds.annotate_paths(['.', fpath], recursive=True)
    # make a copy, just to the sure
    orig_res = deepcopy(res)
    assert_result_count(res, 7)
    # and in again, no recursion this time
    res_again = ds.annotate_paths(res)
    # doesn't change a thing
    eq_(orig_res, res_again)
    # and in again, with recursion this time
    res_recursion_again = ds.annotate_paths(res, recursive=True)
    assert_result_count(res_recursion_again, 7)
    # doesn't change a thing
    eq_(orig_res, res_recursion_again)
Esempio n. 2
0
def test_annotate_paths(dspath, nodspath):
    # this test doesn't use API`remove` to avoid circularities
    ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy)
    ds.save(recursive=True)
    ok_clean_git(ds.path)

    with chpwd(dspath):
        # with and without an explicitly given path the result is almost the
        # same inside a dataset
        without_path = annotate_paths(on_failure='ignore')
        pwd_res = annotate_paths(path='.', on_failure='ignore')
        assert_result_count(
            without_path, 1, type='dataset', path=dspath)
        assert_result_count(
            pwd_res, 1, type='dataset', path=dspath, orig_request='.',
            raw_input=True)
        # make sure going into a subdataset vs giving it as a path has no
        # structural impact
        eq_(
            [{k: v for k, v in ap.items()
              if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')}
             for ap in annotate_paths(path='b', recursive=True)],
            [{k: v for k, v in ap.items()
              if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')}
             for ap in annotate_paths(dataset='b', recursive=True)])
    # now do it again, pointing to the ds directly
    res = ds.annotate_paths(on_failure='ignore')
    # no request, no refds, but otherwise the same
    eq_(len(res), len(pwd_res))
    eq_({k: pwd_res[0][k] for k in pwd_res[0]
         if k in ('path', 'type', 'action', 'status')},
        {k: res[0][k] for k in res[0]
         if k not in ('refds',)})

    # will refuse a path that is not a dataset as refds
    res = annotate_paths(dataset=nodspath, on_failure='ignore')
    assert_result_count(
        res, 1, status='error', path=nodspath,
        message='given reference dataset is not a dataset')

    # recursion with proper base dataset
    parentds = Dataset(opj(dspath, 'a'))
    base_res = parentds.annotate_paths(recursive=True)
    # needs to find 'aa' and the base
    assert_result_count(base_res, 2)
    assert_result_count(base_res, 2, type='dataset')
    assert_result_count(
        base_res, 1, type='dataset', parentds=parentds.path,
        path=opj(parentds.path, 'aa'), status='')
    # same recursion but without a base dataset
    res = annotate_paths(path=opj(dspath, 'a'), recursive=True)
    # needs to find 'aa' and 'a' again
    assert_result_count(res, 2)
    eq_(res[-1],
        {k: base_res[-1][k] for k in base_res[-1]
         if k not in ('refds',)})
    assert_result_count(
        res, 1, type='dataset', status='',
        # it does not auto-discover parent datasets without force or a refds
        #parentds=parentds.path,
        path=parentds.path)
    # but we can force parent discovery
    res = parentds.annotate_paths(
        path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True)
    assert_result_count(res, 2)
    assert_result_count(
        res, 1, type='dataset', status='', parentds=dspath,
        path=parentds.path)

    # recursion with multiple disjoint seeds, no common base
    eq_([basename(p) for p in annotate_paths(
         path=[opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba')], recursive=True,
         result_xfm='paths')],
        ['a', 'aa', 'bba', 'bbaa'])

    # recursion with partially overlapping seeds, no duplicate results
    eq_([basename(p) for p in annotate_paths(
         path=[opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba')], recursive=True,
         result_xfm='paths')],
        ['b', 'ba', 'bb', 'bba', 'bbaa'])

    # get straight from a file
    fpath = opj('a', 'aa', 'file_aa')
    res = ds.annotate_paths(fpath)
    assert_result_count(res, 1)
    assert_result_count(
        res, 1, orig_request=fpath, raw_input=True, type='file',
        path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='')
    # now drop it
    dropres = ds.drop(fpath, check=False)
    assert_result_count(dropres, 1, path=res[0]['path'], status='ok')
    # ask for same file again, use 'notneeded' for unavailable to try trigger
    # any difference
    droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded')
    # but we get the same result
    eq_(res, droppedres)

    # now try the same on an uninstalled dataset
    subdspath = opj('b', 'bb')
    # before
    before_res = ds.annotate_paths(subdspath, recursive=True,
                                   unavailable_path_status='error')
    assert_result_count(before_res, 3, status='', type='dataset')
    uninstall_res = ds.uninstall(subdspath, recursive=True, check=False)
    assert_result_count(uninstall_res, 3, status='ok', type='dataset')
    # after
    after_res = ds.annotate_paths(subdspath,
                                  unavailable_path_status='error',
                                  on_failure='ignore')
    # uninstall hides all low-level datasets
    assert_result_count(after_res, 1)
    # but for the top-most uninstalled one it merely reports absent state now
    assert_result_count(
        after_res, 1, state='absent',
        **{k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status')})
    # however, this beauty doesn't come for free, so it can be disabled
    # which will make the uninstalled subdataset like a directory in the
    # parent (or even just a non-existing path, if the mountpoint dir isn't
    # present
    after_res = ds.annotate_paths(subdspath, force_subds_discovery=False)
    assert_result_count(
        after_res, 1, type='directory',
        path=before_res[0]['path'],
        parentds=before_res[0]['parentds'])
    # feed annotated paths into annotate_paths, it shouldn't change things
    # upon second run
    # datasets and file
    res = ds.annotate_paths(['.', fpath], recursive=True)
    # make a copy, just to the sure
    orig_res = deepcopy(res)
    assert_result_count(res, 7)
    # and in again, no recursion this time
    res_again = ds.annotate_paths(res)
    # doesn't change a thing
    eq_(orig_res, res_again)
    # and in again, with recursion this time
    res_recursion_again = ds.annotate_paths(res, recursive=True)
    assert_result_count(res_recursion_again, 7)
    # doesn't change a thing
    eq_(orig_res, res_recursion_again)