Beispiel #1
0
def test_result_filter():
    # ensure baseline without filtering
    assert_equal(
        [r['somekey'] for r in TestUtils().__call__(4)],
        [0, 1, 2, 3])
    # test two functionally equivalent ways to filter results
    # 1. Constraint-based -- filter by exception
    #    we have a full set of AND and OR operators for this
    # 2. custom filer function -- filter by boolean return value
    for filt in (
            EnsureKeyChoice('somekey', (0, 2)),
            lambda x: x['somekey'] in (0, 2)):
        assert_equal(
            [r['somekey'] for r in TestUtils().__call__(
                4,
                result_filter=filt)],
            [0, 2])
        # constraint returns full dict
        assert_dict_equal(
            TestUtils().__call__(
                4,
                result_filter=filt)[-1],
            {'action': 'off', 'path': 'some', 'status': 'ok', 'somekey': 2})

    # test more sophisticated filters that actually get to see the
    # API call's kwargs
    def greatfilter(res, **kwargs):
        assert_equal(kwargs.get('dataset', 'bob'), 'awesome')
        return True
    TestUtils().__call__(4, dataset='awesome', result_filter=greatfilter)

    def sadfilter(res, **kwargs):
        assert_equal(kwargs.get('dataset', 'bob'), None)
        return True
    TestUtils().__call__(4, result_filter=sadfilter)
Beispiel #2
0
def test_extract():
    info, subpaths = au.extract(
        json_stream(ST_DATA["rows"]), "json",
        url_format="{name}_{debut_season}.com",
        filename_format="{age_group}//{now_dead}//{name}.csv")

    eq_(subpaths,
        {"kid", "kid/no", "adult", "adult/yes", "adult/no"})

    eq_([d["url"] for d in info],
        ["will_1.com", "bob_2.com", "scott_1.com", "max_2.com"])

    eq_([d["filename"] for d in info],
        ["kid/no/will.csv", "adult/yes/bob.csv",
         "adult/no/scott.csv", "kid/no/max.csv"])

    expects = [{"name": "will", "age_group": "kid", "debut_season": "1",
                "now_dead": "no"},
               {"name": "bob", "age_group": "adult", "debut_season": "2",
                "now_dead": "yes"},
               {"name": "scott", "age_group": "adult", "debut_season": "1",
                "now_dead": "no"},
               {"name": "max", "age_group": "kid", "debut_season": "2",
                "now_dead": "no"}]
    for d, expect in zip(info, expects):
        assert_dict_equal(d["meta_args"], expect)

    eq_([d["subpath"] for d in info],
        ["kid/no", "adult/yes", "adult/no", "kid/no"])
Beispiel #3
0
def test_basic_aggregate(path):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    #base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # we will first aggregate the middle dataset on its own, this will
    # serve as a smoke test for the reuse of metadata objects later on
    sub.aggregate_metadata()
    base.save()
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.uninstall('subsub', check=False)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True, update_mode='all')
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.uninstall('sub', recursive=True, check=False)
    assert (not sub.is_installed())
    assert_repo_status(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)
Beispiel #4
0
def test_get_file_parts():
    assert_dict_equal(au.get_file_parts("file.tar.gz", "prefix"),
                      {"prefix": "file.tar.gz",
                       "prefix_root_py": "file.tar",
                       "prefix_ext_py": ".gz",
                       "prefix_root": "file",
                       "prefix_ext": ".tar.gz"})
Beispiel #5
0
def test_extract():
    info, subpaths = au.extract(
        json_stream(ST_DATA["rows"]), "json",
        url_format="{name}_{debut_season}.com",
        filename_format="{age_group}//{now_dead}//{name}.csv")

    eq_(subpaths,
        {"kid", "kid/no", "adult", "adult/yes", "adult/no"})

    eq_([d["url"] for d in info],
        ["will_1.com", "bob_2.com", "scott_1.com", "max_2.com"])

    eq_([d["filename"] for d in info],
        ["kid/no/will.csv", "adult/yes/bob.csv",
         "adult/no/scott.csv", "kid/no/max.csv"])

    expects = [{"name": "will", "age_group": "kid", "debut_season": "1",
                "now_dead": "no"},
               {"name": "bob", "age_group": "adult", "debut_season": "2",
                "now_dead": "yes"},
               {"name": "scott", "age_group": "adult", "debut_season": "1",
                "now_dead": "no"},
               {"name": "max", "age_group": "kid", "debut_season": "2",
                "now_dead": "no"}]
    for d, expect in zip(info, expects):
        assert_dict_equal(d["meta_args"], expect)

    eq_([d["subpath"] for d in info],
        ["kid/no", "adult/yes", "adult/no", "kid/no"])
Beispiel #6
0
def test_basic_aggregate(path):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    #base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    # we will first aggregate the middle dataset on its own, this will
    # serve as a smoke test for the reuse of metadata objects later on
    sub.aggregate_metadata()
    base.save()
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.uninstall('subsub', check=False)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True, update_mode='all')
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.uninstall('sub', recursive=True, check=False)
    assert(not sub.is_installed())
    ok_clean_git(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)
Beispiel #7
0
def test_result_filter():
    # ensure baseline without filtering
    assert_equal([r['somekey'] for r in TestUtils().__call__(4)], [0, 1, 2, 3])
    # test two functionally equivalent ways to filter results
    # 1. Constraint-based -- filter by exception
    #    we have a full set of AND and OR operators for this
    # 2. custom filer function -- filter by boolean return value
    for filt in (EnsureKeyChoice('somekey', (0, 2)), lambda x: x['somekey'] in
                 (0, 2)):
        assert_equal([
            r['somekey'] for r in TestUtils().__call__(4, result_filter=filt)
        ], [0, 2])
        # constraint returns full dict
        assert_dict_equal(TestUtils().__call__(4, result_filter=filt)[-1], {
            'action': 'off',
            'path': 'some',
            'status': 'ok',
            'somekey': 2
        })

    # test more sophisticated filters that actually get to see the
    # API call's kwargs
    def greatfilter(res, **kwargs):
        assert_equal(kwargs.get('dataset', 'bob'), 'awesome')
        return True

    TestUtils().__call__(4, dataset='awesome', result_filter=greatfilter)

    def sadfilter(res, **kwargs):
        assert_equal(kwargs.get('dataset', 'bob'), None)
        return True

    TestUtils().__call__(4, result_filter=sadfilter)
Beispiel #8
0
def test_basic_aggregate(path):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True)
    ok_clean_git(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.uninstall('subsub', check=False)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.uninstall('sub', recursive=True, check=False)
    assert (not sub.is_installed())
    ok_clean_git(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)
Beispiel #9
0
def test_get_file_parts():
    assert_dict_equal(au.get_file_parts("file.tar.gz", "prefix"),
                      {"prefix": "file.tar.gz",
                       "prefix_root_py": "file.tar",
                       "prefix_ext_py": ".gz",
                       "prefix_root": "file",
                       "prefix_ext": ".tar.gz"})
Beispiel #10
0
def test_compare_content_info(path):
    # TODO remove when `create` is RF to return the new Dataset
    ds = RevolutionDataset(Dataset(path).create().path)
    assert_repo_status(path)

    # for a clean repo HEAD and worktree query should yield identical results
    wt = ds.repo.get_content_info(ref=None)
    assert_dict_equal(wt, ds.repo.get_content_info(ref='HEAD'))
Beispiel #11
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file,
                           "{url}",
                           "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(ds.addurls(self.json_file,
                                         "{url}",
                                         "{name}",
                                         ifexists="skip"),
                              action="addurls",
                              status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                          "{url}", "{name}")
Beispiel #12
0
def test_basic_dsmeta(path):
    ds = Dataset(path).create()
    ok_clean_git(path)
    # ensure clean slate
    assert_result_count(ds.metadata(), 0)
    # init
    res = ds.metadata(init=['tag1', 'tag2'], dataset_global=True)
    eq_(res[0]['metadata']['tag'], ['tag1', 'tag2'])
    # init again does nothing
    res = ds.metadata(init=['tag3'], dataset_global=True)
    eq_(res[0]['metadata']['tag'], ['tag1', 'tag2'])
    # reset whole key
    res = ds.metadata(reset=['tag'], dataset_global=True)
    assert_result_count(ds.metadata(), 0)
    # add something arbitrary
    res = ds.metadata(add=dict(dtype=['heavy'], readme=['short', 'long']),
                      dataset_global=True)
    eq_(res[0]['metadata']['dtype'], ['heavy'])
    # sorted!
    eq_(res[0]['metadata']['readme'], ['long', 'short'])
    # supply key definitions, no need for dataset_global
    res = ds.metadata(define_key=dict(mykey='truth'))
    eq_(res[0]['metadata']['definition'], {'mykey': u'truth'})
    # re-supply different key definitions -> error
    res = ds.metadata(define_key=dict(mykey='lie'), on_failure='ignore')
    assert_result_count(
        res,
        1,
        status='error',
        message=("conflicting definition for key '%s': '%s' != '%s'", "mykey",
                 "lie", "truth"))
    res = ds.metadata(define_key=dict(otherkey='altfact'))
    assert_dict_equal(res[0]['metadata']['definition'], {
        'mykey': u'truth',
        'otherkey': 'altfact'
    })
    # 'definition' is a regular key, we can remove items
    res = ds.metadata(remove=dict(definition=['mykey']), dataset_global=True)
    assert_dict_equal(res[0]['metadata']['definition'],
                      {'otherkey': u'altfact'})
    res = ds.metadata(remove=dict(definition=['otherkey']),
                      dataset_global=True)
    # when there are no items left, the key vanishes too
    assert ('definition' not in res[0]['metadata'])
    # we still have metadata, so there is a DB file
    assert (res[0]['metadata'])
    db_path = opj(ds.path, '.datalad', 'metadata', 'dataset.json')
    assert (exists(db_path))
    ok_clean_git(ds.path)
    # but if we remove it, the file is gone
    res = ds.metadata(reset=['readme', 'dtype'], dataset_global=True)
    eq_(res[0]['metadata'], {})
    assert (not exists(db_path))
    ok_clean_git(ds.path)
Beispiel #13
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta,
                                  {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(
                ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"),
                action="addurls",
                status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError,
                          ds.addurls,
                          self.json_file, "{url}", "{name}")
Beispiel #14
0
def test_update_strategy(path):
    base = Dataset(op.join(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(op.join(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = sub.create(op.join('subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # we start clean
    for ds in base, sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the base dataset only, nothing below changes
    base.meta_aggregate()
    eq_(len(_get_contained_objs(base)), 2)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the entire tree, but by default only updates
    # the top-level dataset with all objects, none of the leaf
    # or intermediate datasets get's touched
    base.meta_aggregate(recursive=True)
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_referenced_objs(base)), 6)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    res = base.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 3)
    # it is impossible to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status('impossible',
                      ds.meta_dump(reporton='aggregates', on_failure='ignore'))
    # get the full metadata report
    target_meta = _kill_time(base.meta_dump())

    # now redo full aggregation, this time updating all
    # (intermediate) datasets
    base.meta_aggregate(recursive=True, into='all')
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_contained_objs(sub)), 4)
    eq_(len(_get_contained_objs(subsub)), 2)
    # it is now OK to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status('ok',
                      ds.meta_dump(reporton='aggregates', on_failure='ignore'))

    # all of that has no impact on the reported metadata
    # minus the change in the refcommits
    for i in zip(target_meta, _kill_time(base.meta_dump())):
        assert_dict_equal(i[0], i[1])
def test_dicom(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'dicom', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'dicom.dcm'),
        path)
    ds.add('.')
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    # query for the file metadata
    res = ds.metadata('dicom.dcm')
    assert_result_count(res, 1)
    # from this extractor
    meta = res[0]['metadata']['dicom']
    assert_in('@context', meta)
    # no point in testing ALL keys, but we got plenty
    assert (len(meta.keys()) > 70)
    eq_(meta['SeriesDate'], '20070205')

    # now ask for the dataset metadata, which should have both the unique props
    # and a list of imageseries (one in this case, but a list)
    res = ds.metadata(reporton='datasets')
    assert_result_count(res, 1)
    dsmeta = res[0]['metadata']['dicom']
    # same context
    assert_dict_equal(meta['@context'], dsmeta['@context'])
    meta.pop('@context')
    eq_(dsmeta['Series'], [meta])

    # for this artificial case pretty much the same info also comes out as
    # unique props, but wrapped in lists
    ucp = res[0]['metadata']["datalad_unique_content_properties"]['dicom']
    assert_dict_equal(
        {
            k: [v]
            for k, v in dsmeta['Series'][0].items()
            if k not in DicomExtractor._unique_exclude and k in ucp
        }, {
            k: v
            for k, v in ucp.items() if k not in DicomExtractor._unique_exclude
        })

    # buuuut, if we switch of file-based metadata storage
    ds.config.add('datalad.metadata.aggregate-content-dicom',
                  'false',
                  where='dataset')
    ds.aggregate_metadata()
    res = ds.metadata(reporton='datasets')

    # the auto-uniquified bits are gone but the Series description stays
    assert_not_in("datalad_unique_content_properties", res[0]['metadata'])
    eq_(dsmeta['Series'], [meta])
Beispiel #16
0
def test_extract_exclude_autometa_regexp():
    info, _ = au.extract(
        json_stream(ST_DATA["rows"]), "json",
        url_format="{name}_{debut_season}.com",
        filename_format="{age_group}//{now_dead}//{name}.csv",
        exclude_autometa="ea")

    expects = [{"name": "will", "age_group": "kid"},
               {"name": "bob", "age_group": "adult"},
               {"name": "scott", "age_group": "adult"},
               {"name": "max", "age_group": "kid"}]
    for d, expect in zip(info, expects):
        assert_dict_equal(d["meta_args"], expect)
Beispiel #17
0
def _compare_metadata_helper(origres, compds):
    for ores in origres:
        rpath = relpath(ores['path'], ores['refds'])
        cres = compds.metadata(rpath, reporton='{}s'.format(ores['type']))
        if ores['type'] == 'file':
            # TODO implement file based lookup
            continue
        assert_result_count(cres, 1)
        cres = cres[0]
        assert_dict_equal(ores['metadata'], cres['metadata'])
        if ores['type'] == 'dataset':
            for i in ('dsid', ):
                eq_(ores[i], cres[i])
Beispiel #18
0
def test_extract_exclude_autometa_regexp():
    info, _ = au.extract(
        json_stream(ST_DATA["rows"]), "json",
        url_format="{name}_{debut_season}.com",
        filename_format="{age_group}//{now_dead}//{name}.csv",
        exclude_autometa="ea")

    expects = [{"name": "will", "age_group": "kid"},
               {"name": "bob", "age_group": "adult"},
               {"name": "scott", "age_group": "adult"},
               {"name": "max", "age_group": "kid"}]
    for d, expect in zip(info, expects):
        assert_dict_equal(d["meta_args"], expect)
Beispiel #19
0
def _compare_metadata_helper(origres, compds):
    for ores in origres:
        rpath = op.relpath(ores['path'], ores['refds'])
        cres = compds.meta_dump(rpath, reporton='{}s'.format(ores['type']))
        if ores['type'] == 'file':
            # TODO implement file based lookup
            continue
        assert_result_count(cres, 1)
        cres = cres[0]
        assert_dict_equal(ores['metadata'], cres['metadata'])
        if ores['type'] == 'dataset':
            eq_(_get_dsid_from_core_metadata(ores['metadata']['metalad_core']),
                _get_dsid_from_core_metadata(cres['metadata']['metalad_core']))
Beispiel #20
0
def test_discover_ds_trace(path, otherdir):
    ds = make_demo_hierarchy_datasets(
        path,
        {k: v for k, v in demo_hierarchy.items() if k in ['a', 'd']})
    a = opj(ds.path, 'a')
    aa = opj(a, 'aa')
    d = opj(ds.path, 'd')
    db = opj(d, 'db')
    # we have to check whether we get the correct hierarchy, as the test
    # subject is also involved in this
    assert_true(exists(opj(db, 'file_db')))
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)
    # now two datasets which are not available locally, but we
    # know about them (e.g. from metadata)
    dba = opj(db, 'sub', 'dba')
    dbaa = opj(dba, 'subsub', 'dbaa')
    for input, eds, goal in (
            ([], None, {}),
            ([ds.path], None, {}),
            ([otherdir], None, {}),
            ([opj(ds.path, 'nothere')], None, {}),
            ([opj(d, 'nothere')], None, {}),
            ([opj(db, 'nothere')], None, {}),
            ([a], None,
             {ds.path: set([a])}),
            ([aa, a], None,
             {ds.path: set([a]), a: set([aa])}),
            ([db], None,
             {ds.path: set([d]), d: set([db])}),
            ([opj(db, 'file_db')], None,
             {ds.path: set([d]), d: set([db])}),
            # just a regular non-existing path
            ([dba], None, {}),
            # but if we inject this knowledge it must come back out
            # as the child of the closest existing dataset
            ([dba], [dba],
             {ds.path: set([d]), d: set([db]), db: set([dba])}),
            # regardless of the depth
            ([dbaa], [dbaa],
             {ds.path: set([d]), d: set([db]), db: set([dbaa])}),
            ([dba, dbaa], [dba, dbaa],
             {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}),
            # we can simply add existing and non-existing datasets to the
            # include list get the desired result
            ([d, dba, dbaa], [d, dba, dbaa],
             {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}),
    ):
        spec = {}
        discover_dataset_trace_to_targets(ds.path, input, [], spec, includeds=eds)
        assert_dict_equal(spec, goal)
Beispiel #21
0
def test_discover_ds_trace(path, otherdir):
    ds = make_demo_hierarchy_datasets(
        path,
        {k: v for k, v in demo_hierarchy.items() if k in ['a', 'd']})
    a = opj(ds.path, 'a')
    aa = opj(a, 'aa')
    d = opj(ds.path, 'd')
    db = opj(d, 'db')
    # we have to check whether we get the correct hierarchy, as the test
    # subject is also involved in this
    assert_true(exists(opj(db, 'file_db')))
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)
    # now two datasets which are not available locally, but we
    # know about them (e.g. from metadata)
    dba = opj(db, 'sub', 'dba')
    dbaa = opj(dba, 'subsub', 'dbaa')
    for input, eds, goal in (
            ([], None, {}),
            ([ds.path], None, {}),
            ([otherdir], None, {}),
            ([opj(ds.path, 'nothere')], None, {}),
            ([opj(d, 'nothere')], None, {}),
            ([opj(db, 'nothere')], None, {}),
            ([a], None,
             {ds.path: set([a])}),
            ([aa, a], None,
             {ds.path: set([a]), a: set([aa])}),
            ([db], None,
             {ds.path: set([d]), d: set([db])}),
            ([opj(db, 'file_db')], None,
             {ds.path: set([d]), d: set([db])}),
            # just a regular non-existing path
            ([dba], None, {}),
            # but if we inject this knowledge it must come back out
            # as the child of the closest existing dataset
            ([dba], [dba],
             {ds.path: set([d]), d: set([db]), db: set([dba])}),
            # regardless of the depth
            ([dbaa], [dbaa],
             {ds.path: set([d]), d: set([db]), db: set([dbaa])}),
            ([dba, dbaa], [dba, dbaa],
             {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}),
            # we can simply add existing and non-existing datasets to the
            # include list get the desired result
            ([d, dba, dbaa], [d, dba, dbaa],
             {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}),
    ):
        spec = {}
        discover_dataset_trace_to_targets(ds.path, input, [], spec, includeds=eds)
        assert_dict_equal(spec, goal)
Beispiel #22
0
def test_assert_dict_equal():
    assert_dict_equal({}, {})
    assert_dict_equal({"a": 3}, {"a": 3})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {1: 4})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {2: 4})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {2: 4, 1: 3})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {2: 4, 1: 'a'})
    try:
        import numpy as np
    except:  # pragma: no cover
        raise SkipTest("need numpy for this tiny one")
    # one is scalar another one array
    assert_raises(AssertionError, assert_dict_equal, {1: 0}, {1: np.arange(1)})
    assert_raises(AssertionError, assert_dict_equal, {1: 0}, {1: np.arange(3)})
Beispiel #23
0
def test_compare_content_info(path):
    # TODO remove when `create` is RF to return the new Dataset
    ds = Dataset(path).create()
    assert_repo_status(path)

    # for a clean repo HEAD and worktree query should yield identical results
    # minus a 'bytesize' report that is readily available for HEAD, but would
    # not a stat call per file for the worktree, and is not done ATM
    wt = ds.repo.get_content_info(ref=None)
    assert_dict_equal(
        wt,
        {f: {k: v for k, v in iteritems(p) if k != 'bytesize'}
         for f, p in iteritems(ds.repo.get_content_info(ref='HEAD'))}
    )
Beispiel #24
0
def test_compare_content_info(path):
    # TODO remove when `create` is RF to return the new Dataset
    ds = Dataset(path).create()
    assert_repo_status(path)

    # for a clean repo HEAD and worktree query should yield identical results
    # minus a 'bytesize' report that is readily available for HEAD, but would
    # not a stat call per file for the worktree, and is not done ATM
    wt = ds.repo.get_content_info(ref=None)
    assert_dict_equal(
        wt,
        {f: {k: v for k, v in p.items() if k != 'bytesize'}
         for f, p in ds.repo.get_content_info(ref='HEAD').items()}
    )
Beispiel #25
0
def test_get_url_parts():
    eq_(au.get_url_parts(""), {})
    assert_dict_equal(au.get_url_parts("http://datalad.org"),
                      {"_url_hostname": "datalad.org"})

    assert_dict_equal(
        au.get_url_parts("http://datalad.org/about.html"), {
            "_url_hostname": "datalad.org",
            "_url0": "about.html",
            "_url_basename": "about.html",
            "_url_basename_root_py": "about",
            "_url_basename_ext_py": ".html",
            "_url_basename_root": "about",
            "_url_basename_ext": ".html"
        })
    assert_dict_equal(au.get_url_parts("http://datalad.org/about.html"),
                      au.get_url_parts("http://datalad.org//about.html"))

    assert_dict_equal(
        au.get_url_parts("http://datalad.org/for/git-users"), {
            "_url_hostname": "datalad.org",
            "_url0": "for",
            "_url1": "git-users",
            "_url_basename": "git-users",
            "_url_basename_root_py": "git-users",
            "_url_basename_ext_py": "",
            "_url_basename_root": "git-users",
            "_url_basename_ext": ""
        })
Beispiel #26
0
def test_get_url_parts():
    eq_(au.get_url_parts(""), {})
    assert_dict_equal(au.get_url_parts("http://datalad.org"),
                      {"_url_hostname": "datalad.org"})

    assert_dict_equal(au.get_url_parts("http://datalad.org/about.html"),
                      {"_url_hostname": "datalad.org",
                       "_url0": "about.html",
                       "_url_basename": "about.html",
                       "_url_basename_root_py": "about",
                       "_url_basename_ext_py": ".html",
                       "_url_basename_root": "about",
                       "_url_basename_ext": ".html"})
    assert_dict_equal(au.get_url_parts("http://datalad.org/about.html"),
                      au.get_url_parts("http://datalad.org//about.html"))

    assert_dict_equal(
        au.get_url_parts("http://datalad.org/for/git-users"),
        {"_url_hostname": "datalad.org",
         "_url0": "for",
         "_url1": "git-users",
         "_url_basename": "git-users",
         "_url_basename_root_py": "git-users",
         "_url_basename_ext_py": "",
         "_url_basename_root": "git-users",
         "_url_basename_ext": ""})
Beispiel #27
0
def _compare_metadata_helper(origres, compds):
    for ores in origres:
        rpath = relpath(ores['path'], ores['refds'])
        cres = compds.metadata(
            rpath,
            reporton='{}s'.format(ores['type']))
        if ores['type'] == 'file':
            # TODO implement file based lookup
            continue
        assert_result_count(cres, 1)
        cres = cres[0]
        assert_dict_equal(ores['metadata'], cres['metadata'])
        if ores['type'] == 'dataset':
            for i in ('dsid', ):
                eq_(ores[i], cres[i])
Beispiel #28
0
def test_check_dates(path):
    skip_if_no_module("dateutil")

    ref_ts = 1218182889  # Fri, 08 Aug 2008 04:08:09 -0400
    refdate = "@{}".format(ref_ts)

    repo = os.path.join(path, "repo")
    with set_date(ref_ts + 5000):
        ar = AnnexRepo(repo)
        ar.add(".")
        ar.commit()

    # The standard renderer outputs json.
    with swallow_outputs() as cmo:
        # Set level to WARNING to avoid the progress bar when
        # DATALAD_TESTS_UI_BACKEND=console.
        with swallow_logs(new_level=logging.WARNING):
            check_dates([repo],
                        reference_date=refdate,
                        return_type="list")
        assert_in("report", json.loads(cmo.out).keys())

    # We find the newer objects.
    newer = call([path], reference_date=refdate)
    eq_(len(newer), 1)
    ok_(newer[0]["report"]["objects"])

    # There are no older objects to find.
    older = call([repo], reference_date=refdate, older=True)
    assert_false(older[0]["report"]["objects"])

    # We can pass the date in RFC 2822 format.
    assert_dict_equal(
        newer[0],
        call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0])

    # paths=None defaults to the current directory.
    with chpwd(path):
        assert_dict_equal(
            newer[0]["report"],
            call(paths=None, reference_date=refdate)[0]["report"])

    # Only commit type is present when annex='none'.
    newer_noannex = call([path], reference_date=refdate, annex="none")
    for entry in newer_noannex[0]["report"]["objects"].values():
        ok_(entry["type"] == "commit")
Beispiel #29
0
def test_check_dates(path):
    skip_if_no_module("dateutil")

    ref_ts = 1218182889  # Fri, 08 Aug 2008 04:08:09 -0400
    refdate = "@{}".format(ref_ts)

    repo = os.path.join(path, "repo")
    with set_date(ref_ts + 5000):
        ar = AnnexRepo(repo)
        ar.add(".")
        ar.commit()

    # The standard renderer outputs json.
    with swallow_outputs() as cmo:
        # Set level to WARNING to avoid the progress bar when
        # DATALAD_TESTS_UI_BACKEND=console.
        with swallow_logs(new_level=logging.WARNING):
            check_dates([repo], reference_date=refdate, return_type="list")
        assert_in("report", json.loads(cmo.out).keys())

    # We find the newer objects.
    newer = call([path], reference_date=refdate)
    eq_(len(newer), 1)
    ok_(newer[0]["report"]["objects"])

    # There are no older objects to find.
    older = call([repo], reference_date=refdate, older=True)
    assert_false(older[0]["report"]["objects"])

    # We can pass the date in RFC 2822 format.
    assert_dict_equal(
        newer[0],
        call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0])

    # paths=None defaults to the current directory.
    with chpwd(path):
        assert_dict_equal(
            newer[0]["report"],
            call(paths=None, reference_date=refdate)[0]["report"])

    # Only commit type is present when annex='none'.
    newer_noannex = call([path], reference_date=refdate, annex="none")
    for entry in newer_noannex[0]["report"]["objects"].values():
        ok_(entry["type"] == "commit")
Beispiel #30
0
def test_rerun_commit_message_check():
    assert_raises(ValueError,
                  get_run_info,
                  None,
                  """\
[DATALAD RUNCMD] no command

=== Do not change lines below ===
{
 "pwd": ".",
 "exit": 0
}
^^^ Do not change lines above ^^^""")

    assert_raises(ValueError,
                  get_run_info,
                  None,
                  """\
[DATALAD RUNCMD] junk json

=== Do not change lines below ===
{
 "pwd": ".,
 "cmd": "echo ok >okfile",
 "exit": 0
}
^^^ Do not change lines above ^^^""")

    subject, info = get_run_info(
        None,
        """\
[DATALAD RUNCMD] fine

=== Do not change lines below ===
{
 "pwd": ".",
 "cmd": "echo ok >okfile",
 "exit": 0
}
^^^ Do not change lines above ^^^""")
    eq_(subject, "fine")
    assert_dict_equal(info,
                      {"pwd": ".", "cmd": "echo ok >okfile", "exit": 0})
Beispiel #31
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file,
                           "{url}",
                           "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(ds.addurls(self.json_file,
                                         "{url}",
                                         "{name}",
                                         ifexists="skip"),
                              action="addurls",
                              status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.add("a")

            assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                          "{url}", "{name}")
Beispiel #32
0
def test_rerun_commit_message_check():
    assert_raises(ValueError,
                  get_run_info,
                  None,
                  """\
[DATALAD RUNCMD] no command

=== Do not change lines below ===
{
 "pwd": ".",
 "exit": 0
}
^^^ Do not change lines above ^^^""")

    assert_raises(ValueError,
                  get_run_info,
                  None,
                  """\
[DATALAD RUNCMD] junk json

=== Do not change lines below ===
{
 "pwd": ".,
 "cmd": "echo ok >okfile",
 "exit": 0
}
^^^ Do not change lines above ^^^""")

    subject, info = get_run_info(
        None,
        """\
[DATALAD RUNCMD] fine

=== Do not change lines below ===
{
 "pwd": ".",
 "cmd": "echo ok >okfile",
 "exit": 0
}
^^^ Do not change lines above ^^^""")
    eq_(subject, "fine")
    assert_dict_equal(info,
                      {"pwd": ".", "cmd": "echo ok >okfile", "exit": 0})
def test_nested_metadata(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.aggregate_metadata()
    # BIDS returns participant info as a nested dict for each file in the
    # content metadata. On the dataset-level this should automatically
    # yield a sequence of participant info dicts, without any further action
    # or BIDS-specific configuration
    meta = ds.metadata('.', reporton='datasets',
                       return_type='item-or-list')['metadata']
    for i in zip(
            sorted(
                meta['datalad_unique_content_properties']['bids']['subject'],
                key=lambda x: x['id']),
            sorted([{
                "age(years)": "20-25",
                "id": "03",
                "gender": "female",
                "handedness": "r",
                "hearing_problems_current": "n",
                "language": "english"
            }, {
                "age(years)": "30-35",
                "id": "01",
                "gender": 'n/a',
                "handedness": "r",
                "hearing_problems_current": "n",
                "language": u"русский"
            }],
                   key=lambda x: x['id'])):
        assert_dict_equal(i[0], i[1])
    # we can turn off this kind of auto-summary
    ds.config.add('datalad.metadata.generate-unique-bids',
                  'false',
                  where='dataset')
    ds.aggregate_metadata()
    meta = ds.metadata('.', reporton='datasets',
                       return_type='item-or-list')['metadata']
    # protect next test a little, in case we enhance our core extractor in the future
    # to provide more info
    if 'datalad_unique_content_properties' in meta:
        assert_not_in('bids', meta['datalad_unique_content_properties'])
Beispiel #34
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://localhost:22",
                       target_dir=target_path,
                       ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   name="local_target_alt",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    ok_(
        text_type(cm.exception).startswith(
            "Target path %s already exists. And it fails to rmdir" %
            target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(dataset=source,
                                   name="local_target",
                                   sshurl="ssh://localhost" + target_path,
                                   publish_by_default='master',
                                   existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)
Beispiel #35
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path, source=origin)

    target_path = opj(target_rootpath, "basic")
    # it will try to fetch it so would fail as well since sshurl is wrong
    with swallow_logs(new_level=logging.ERROR) as cml, \
        assert_raises(GitCommandError):
        create_sibling(dataset=source,
                       target="local_target",
                       sshurl="ssh://localhost",
                       target_dir=target_path,
                       ui=True)
    # is not actually happening on one of the two basic cases -- TODO figure it out
    # assert_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    eq_("ssh://localhost", source.repo.get_remote_url("local_target"))
    # should NOT be able to push now, since url isn't correct:
    # TODO:  assumption is wrong if ~ does have .git! fix up!
    assert_raises(GitCommandError, publish, dataset=source, to="local_target")

    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # for some reason this was "correct"
        # eq_(local_target_cfg('annex-ignore'), 'false')
        # but after fixing creating siblings in
        # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail
        # I think it is legit since we are trying to fetch now before calling
        # annex.enable_remote so it doesn't set it up, and fails before
        assert_raises(Exception, local_target_cfg, 'annex-ignore')
        # hm, but ATM wouldn't get a uuid since url is wrong
        assert_raises(Exception, local_target_cfg, 'annex-uuid')

    # do it again without force:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   target="local_target",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    eq_("Target directory %s already exists." % target_path, str(cm.exception))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        ok_endswith(target_description, target_path)
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(dataset=source,
                                   target="local_target",
                                   sshurl="ssh://localhost" + target_path,
                                   existing='replace')
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        if external_versions['cmd:system-git'] >= '2.4':
            # on elderly git we don't change receive setting
            ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        assert_set_equal(modified_files, ok_modified_files)
Beispiel #36
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return len(ds.repo.get_revisions("git-annex"))

        n_annex_commits = get_annex_commit_counts()

        # Meanwhile also test that we can specify path relative
        # to the top of the dataset, as we generally treat paths in
        # Python API, and it will be the one saved in commit
        # message record
        json_file = op.relpath(self.json_file, ds.path)

        ds.addurls(json_file,
                   "{url}",
                   "{name}",
                   exclude_autometa="(md5sum|size)")
        ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH),
                      f"url_file='{json_file}'")

        filenames = ["a", "b", "c"]
        for fname in filenames:
            ok_exists(op.join(ds.path, fname))

        for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                         ["foo", "bar", "foo"]):
            assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

        # Ignore this check if we're faking dates because that disables
        # batch mode.
        # Also ignore if on Windows as it seems as if a git-annex bug
        # leads to separate meta data commits:
        # https://github.com/datalad/datalad/pull/5202#discussion_r535429704
        if not (dl_cfg.get('datalad.fake-dates') or on_windows):
            # We should have two new commits on the git-annex: one for the
            # added urls and one for the added metadata.
            eq_(n_annex_commits + 2, get_annex_commit_counts())

        # Add to already existing links, overwriting.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite")
            for fname in filenames:
                assert_in("Removing {}".format(os.path.join(path, fname)),
                          cml.out)

        # Add to already existing links, skipping.
        assert_in_results(ds.addurls(self.json_file,
                                     "{url}",
                                     "{name}",
                                     ifexists="skip"),
                          action="addurls",
                          status="notneeded")

        # Add to already existing links works, as long content is the same.
        ds.addurls(self.json_file, "{url}", "{name}")

        # But it fails if something has changed.
        ds.unlock("a")
        with open(op.join(ds.path, "a"), "w") as ofh:
            ofh.write("changed")
        ds.save("a")

        assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                      "{url}", "{name}")
Beispiel #37
0
def test_dicom(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'dicom', where='dataset')
    copy(
        op.join(op.dirname(op.dirname(op.dirname(__file__))), 'tests', 'data',
                'files', 'dicom.dcm'), path)
    ds.save()
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    # query for the file metadata
    res = ds.metadata('dicom.dcm')
    assert_result_count(res, 1)
    # from this extractor
    meta = res[0]['metadata']['dicom']
    assert_in('@context', meta)
    # no point in testing ALL keys, but we got plenty
    assert (len(meta.keys()) > 70)
    eq_(meta['SeriesDate'], '20070205')
    # Actually a tricky one of the dcm.multival.MultiValue type
    # which we should extract as a list
    # https://github.com/datalad/datalad-neuroimaging/issues/49
    eq_(meta['ImageType'], ['ORIGINAL', 'PRIMARY', 'EPI', 'NONE'])
    # make sure we have PatientName -- this is not using a basic data type, but
    # dicom.valuerep.PersonName3 -- conversion should have handled that
    # we can only test if the key is there, the source dicom has an empty
    # string as value
    eq_(meta['PatientName'], '')

    # now ask for the dataset metadata, which should have both the unique props
    # and a list of imageseries (one in this case, but a list)
    res = ds.metadata(reporton='datasets')
    assert_result_count(res, 1)
    dsmeta = res[0]['metadata']['dicom']
    # same context
    assert_dict_equal(meta['@context'], dsmeta['@context'])
    meta.pop('@context')
    seriesmeta = dsmeta['Series']
    eq_(seriesmeta[0].pop('SeriesDirectory'), op.curdir)
    eq_(dsmeta['Series'], [meta])

    # for this artificial case pretty much the same info also comes out as
    # unique props, but wrapped in lists
    ucp = res[0]['metadata']["datalad_unique_content_properties"]['dicom']
    assert_dict_equal(
        {
            k: [v]
            for k, v in dsmeta['Series'][0].items()
            if k not in DicomExtractor._unique_exclude and k in ucp
        }, {
            k: v
            for k, v in ucp.items() if k not in DicomExtractor._unique_exclude
        })

    # buuuut, if we switch of file-based metadata storage
    ds.config.add('datalad.metadata.aggregate-content-dicom',
                  'false',
                  where='dataset')
    ds.aggregate_metadata()
    res = ds.metadata(reporton='datasets')

    if not datalad_extracts_annex_key:
        # the auto-uniquified bits are gone but the Series description stays
        assert_not_in("datalad_unique_content_properties", res[0]['metadata'])
    eq_(dsmeta['Series'], [meta])
Beispiel #38
0
def test_basic_dsmeta(path):
    ds = Dataset(path).create()
    ok_clean_git(path)
    # ensure clean slate
    res = ds.metadata(reporton='datasets')
    assert_result_count(res, 1)
    _assert_metadata_empty(res[0]['metadata'])
    # init
    res = ds.metadata(init=['tag1', 'tag2'], apply2global=True)
    eq_(res[0]['metadata']['tag'], ['tag1', 'tag2'])
    # init again does nothing
    res = ds.metadata(init=['tag3'], apply2global=True)
    eq_(res[0]['metadata']['tag'], ['tag1', 'tag2'])
    # reset whole key
    ds.metadata(reset=['tag'], apply2global=True)
    res = ds.metadata(reporton='datasets')
    assert_result_count(res, 1)
    _assert_metadata_empty(res[0]['metadata'])
    # add something arbitrary
    res = ds.metadata(add=dict(dtype=['heavy'], readme=['short', 'long']),
                      apply2global=True,
                      on_failure='ignore')
    # fails due to unknown keys
    assert_status('error', res)
    res = ds.metadata(add=dict(dtype=['heavy'], readme=['short', 'long']),
                      define_key=dict(dtype='is_a_datatype',
                                      readme='is_readme_content'),
                      apply2global=True)

    eq_(res[0]['metadata']['dtype'], 'heavy')
    # sorted!
    eq_(res[0]['metadata']['readme'], ['long', 'short'])
    # check it reports common keys
    with swallow_outputs() as cmo:
        ds.metadata(show_keys=True)
        assert_in('license', cmo.out)
    # supply key definitions, no need for apply2global
    res = ds.metadata(define_key=dict(mykey='truth'))
    eq_(res[0]['metadata']['definition']['mykey'], u'truth')
    with swallow_outputs() as cmo:
        ds.metadata(show_keys=True)
        assert_in('mykey: truth (dataset: {})'.format(ds.path), cmo.out)
    # re-supply different key definitions -> error
    res = ds.metadata(define_key=dict(mykey='lie'), on_failure='ignore')
    assert_result_count(
        res,
        1,
        status='error',
        message=("conflicting definition for key '%s': '%s' != '%s'", "mykey",
                 "lie", "truth"))
    res = ds.metadata(define_key=dict(otherkey='altfact'), )
    eq_(res[0]['metadata']['definition']['otherkey'], 'altfact')
    # 'definition' is a regular key, we can remove items
    res = ds.metadata(remove=dict(definition=['mykey']), apply2global=True)
    assert_dict_equal(
        res[0]['metadata']['definition'], {
            'otherkey': u'altfact',
            'readme': u'is_readme_content',
            'dtype': u'is_a_datatype'
        })
    res = ds.metadata(remove=dict(definition=['otherkey', 'readme', 'dtype']),
                      apply2global=True)
    # when there are no items left, the key vanishes too
    assert ('definition' not in res[0]['metadata'])
    # we still have metadata, so there is a DB file
    assert (res[0]['metadata'])
    db_path = opj(ds.path, '.datalad', 'metadata', 'dataset.json')
    assert (exists(db_path))
    ok_clean_git(ds.path)
    # but if we remove it, the file is gone
    res = ds.metadata(reset=['readme', 'dtype'], apply2global=True)
    eq_(res[0]['metadata'], {})
    assert (not exists(db_path))
    ok_clean_git(ds.path)
Beispiel #39
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(
        src_path, source=origin,
        result_xfm='datasets', return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(
            dataset=source,
            name="local_target_alt",
            sshurl="ssh://localhost",
            target_dir=target_path)
    ok_(text_type(cm.exception).startswith(
        "Target path %s already exists. And it fails to rmdir" % target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path, create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost" + target_path,
            publish_by_default='master',
            existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path,
            source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests, digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)}
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'), 'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'), '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)
Beispiel #40
0
def test_custom_native_merge(path):
    ds = Dataset(path).create(force=True)
    # no metadata, because nothing is commited
    _assert_metadata_empty(
        ds.metadata(reporton='datasets',
                    result_xfm='metadata',
                    return_type='item-or-list'))
    # enable BIDS metadata, BIDS metadata should become THE metadata
    ds.config.add('datalad.metadata.nativetype', 'bids', where='dataset')
    ds.aggregate_metadata()
    # no metadata, because still nothing is commited
    _assert_metadata_empty(
        ds.metadata(reporton='datasets',
                    result_xfm='metadata',
                    return_type='item-or-list'))
    ds.add('.')
    ds.aggregate_metadata()
    meta = ds.metadata(reporton='datasets',
                       result_xfm='metadata',
                       return_type='item-or-list')
    _clean_meta(meta)
    assert_dict_equal({'name': u'myds', 'author': ['one', 'two']}, meta)
    # now give the ds a custom name, must override the native one
    # but authors still come from BIDS
    ds.metadata(apply2global=True, add=dict(name='mycustom'))
    meta = ds.metadata(reporton='datasets',
                       result_xfm='metadata',
                       return_type='item-or-list')
    _clean_meta(meta)
    assert_dict_equal({'name': u'mycustom', 'author': ['one', 'two']}, meta)
    # we can disable the merge
    meta = ds.metadata(reporton='datasets',
                       merge_native='none',
                       result_xfm='metadata',
                       return_type='item-or-list')
    _clean_meta(meta)
    assert_dict_equal({'name': u'mycustom'}, meta)
    # we can accumulate values
    meta = ds.metadata(reporton='datasets',
                       merge_native='add',
                       result_xfm='metadata',
                       return_type='item-or-list')
    _clean_meta(meta)
    assert_dict_equal({
        'name': ['mycustom', 'myds'],
        'author': ['one', 'two']
    }, meta)
    # we can have native override custom (not sure when needed, though)
    # add one more custom to make visible
    ds.metadata(apply2global=True, init=dict(homepage='fresh'))
    meta = ds.metadata(reporton='datasets',
                       merge_native='reset',
                       result_xfm='metadata',
                       return_type='item-or-list')
    _clean_meta(meta)
    assert_dict_equal(
        {
            'name': u'myds',
            'author': ['one', 'two'],
            'homepage': u'fresh'
        }, meta)
    # enable an additional metadata source
    ds.config.add('datalad.metadata.nativetype',
                  'frictionless_datapackage',
                  where='dataset')
    # we need to reaggregate after the config change
    ds.aggregate_metadata(merge_native='add')
    meta = ds.metadata(reporton='datasets',
                       merge_native='add',
                       result_xfm='metadata',
                       return_type='item-or-list')
    _clean_meta(meta)
    assert_dict_equal(
        {
            'name': ['mycustom', 'myds', 'someother'],
            'author': ['one', 'two'],
            'homepage': u'fresh'
        }, meta)
Beispiel #41
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path, source=origin)

    target_path = opj(target_rootpath, "basic")
    # it will try to fetch it so would fail as well since sshurl is wrong
    with swallow_logs(new_level=logging.ERROR) as cml, \
        assert_raises(GitCommandError):
            create_sibling(
                dataset=source,
                target="local_target",
                sshurl="ssh://localhost",
                target_dir=target_path,
                ui=True)
        # is not actually happening on one of the two basic cases -- TODO figure it out
        # assert_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    eq_("ssh://localhost", source.repo.get_remote_url("local_target"))
    # should NOT be able to push now, since url isn't correct:
    # TODO:  assumption is wrong if ~ does have .git! fix up!
    assert_raises(GitCommandError, publish, dataset=source, to="local_target")

    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # for some reason this was "correct"
        # eq_(local_target_cfg('annex-ignore'), 'false')
        # but after fixing creating siblings in
        # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail
        # I think it is legit since we are trying to fetch now before calling
        # annex.enable_remote so it doesn't set it up, and fails before
        assert_raises(Exception, local_target_cfg, 'annex-ignore')
        # hm, but ATM wouldn't get a uuid since url is wrong
        assert_raises(Exception, local_target_cfg, 'annex-uuid')

    # do it again without force:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path)
    eq_("Target directory %s already exists." % target_path,
        str(cm.exception))
    if src_is_annex:
        target_description = AnnexRepo(target_path, create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        ok_endswith(target_description, target_path)
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost" + target_path,
            existing='replace')
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path,
            source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time; time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests, digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)}
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'), 'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'), '.git/objects/info/packs'
        }
        if external_versions['cmd:system-git'] >= '2.4':
            # on elderly git we don't change receive setting
            ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))})
        assert_set_equal(modified_files, ok_modified_files)