Example #1
0
 def check_renamed_file(recursive, no_annex, path):
     ds = Dataset(path).create(no_annex=no_annex)
     create_tree(path, {'old': ''})
     ds.repo.add('old')
     ds.repo._git_custom_command(['old', 'new'], ['git', 'mv'])
     ds.save(recursive=recursive)
     assert_repo_status(path)
Example #2
0
def test_delete(client):
    client, ds = client
    with client as c:
        assert client.delete('/api/v1/file').status_code == 401
        assert c.get('/api/v1/auth').status_code == 200

        # missing path
        assert client.delete('/api/v1/file').status_code == 400

        testpath = 'subdir/dummy'
        file_content = '{"three": 3}'
        # resource picks up live changes to the dataset
        create_tree(ds.path, {'subdir': {'dummy': file_content}})
        ds.save()
        assert testpath in c.get('/api/v1/file').get_json()['files']

        rq = c.delete(
            '/api/v1/file',
            data=json.dumps(dict(
                path=testpath,
                verify_availability=False,
            )),
            content_type='application/json',
        ).get_json()
        if ds.config.obtain('datalad.repo.direct', False):
            # https://github.com/datalad/datalad/issues/2836
            return
        assert_result_count(rq, 1, action='remove', status='ok', path=testpath)
        assert testpath not in c.get('/api/v1/file').get_json()['files']
Example #3
0
def test_clone_into_dataset(source, top_path):

    ds = create(top_path)
    assert_repo_status(ds.path)

    subds = ds.clone(source,
                     "sub",
                     result_xfm='datasets',
                     return_type='item-or-list')
    ok_((subds.pathobj / '.git').is_dir())
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # sub is clean:
    assert_repo_status(subds.path, annex=None)
    # top is clean:
    assert_repo_status(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    assert_repo_status(ds.path, untracked=['dummy.txt'])
    subds_ = ds.clone(source,
                      "sub2",
                      result_xfm='datasets',
                      return_type='item-or-list')
    eq_(subds_.pathobj, ds.pathobj / "sub2")  # for paranoid yoh ;)
    assert_repo_status(ds.path, untracked=['dummy.txt'])
Example #4
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    ok_clean_git(subds.path, annex=None)
    # top is too:
    ok_clean_git(ds.path, annex=None)
    ds.save('addsub')
    # now it is:
    ok_clean_git(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.add('sub3')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Example #5
0
def test_clone_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.clone(source,
                     "sub",
                     result_xfm='datasets',
                     return_type='item-or-list')
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # sub is clean:
    ok_clean_git(subds.path, annex=None)
    # top is clean:
    ok_clean_git(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.clone(source,
                      "sub2",
                      result_xfm='datasets',
                      return_type='item-or-list')
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Example #6
0
def test_publish_gh1691(origin, src_path, dst_path):

    # prepare src; no subdatasets installed, but mount points present
    source = install(src_path, source=origin, recursive=False)
    ok_(exists(opj(src_path, "subm 1")))
    assert_false(Dataset(opj(src_path, "subm 1")).is_installed())

    # some content modification of the superdataset
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)

    # create the target(s):
    source.create_sibling('ssh://localhost:' + dst_path,
                          name='target',
                          recursive=True)

    # publish recursively, which silently ignores non-installed datasets
    results = source.publish(to='target', recursive=True)
    assert_result_count(results, 1)
    assert_result_count(results,
                        1,
                        status='ok',
                        type='dataset',
                        path=source.path)

    # if however, a non-installed subdataset is requsted explicitly, it'll fail
    results = source.publish(path='subm 1', to='target', on_failure='ignore')
    assert_result_count(results,
                        1,
                        status='impossible',
                        type='dataset',
                        action='publish')
Example #7
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    ok_clean_git(subds.path, annex=None)
    # top is too:
    ok_clean_git(ds.path, annex=None)
    ds.save('addsub')
    # now it is:
    ok_clean_git(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.add('sub3')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Example #8
0
def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(op.join(path, "origin"))
    dspath = op.join(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(
        dspath,
        {
            "mike1": 'mike1',  # will be added from topdir
            "later": "later",  # later from within subdir
            "d": {
                "mike2": 'mike2',  # to be added within subdir
            }
        })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds.save(message="committing", path="./mike1")

    # Let's also do in subdirectory as CWD, check that relative path
    # given to a plain command (not dataset method) are treated as
    # relative to CWD
    with chpwd(op.join(dspath, 'd')):
        save(dataset=ds.path, message="committing", path="mike2")

        later = op.join(op.pardir, "later")
        ds.repo.add(later, git=True)
        save(dataset=ds.path, message="committing", path=later)

    assert_repo_status(dspath)
Example #9
0
    def setup_class(cls):
        mktmp_kws = get_tempfile_kwargs()
        path = tempfile.mkdtemp(**mktmp_kws)
        create_tree(
            path, {
                "udir": {
                    x + ".dat" + ver: x + " content"
                    for x in "abcd" for ver in ["", ".v1"]
                }
            })

        cls._hpath = HTTPPath(path)
        cls._hpath.start()
        cls.url = cls._hpath.url

        cls.json_file = tempfile.mktemp(suffix=".json", **mktmp_kws)
        with open(cls.json_file, "w") as jfh:
            json.dump([{
                "url": cls.url + "udir/a.dat",
                "name": "a",
                "subdir": "foo"
            }, {
                "url": cls.url + "udir/b.dat",
                "name": "b",
                "subdir": "bar"
            }, {
                "url": cls.url + "udir/c.dat",
                "name": "c",
                "subdir": "foo"
            }], jfh)
Example #10
0
def test_publish_gh1691(origin, src_path, dst_path):

    # prepare src; no subdatasets installed, but mount points present
    source = install(src_path, source=origin, recursive=False)
    ok_(exists(opj(src_path, "subm 1")))
    assert_false(Dataset(opj(src_path, "subm 1")).is_installed())

    # some content modification of the superdataset
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)

    # create the target(s):
    source.create_sibling(
        'ssh://localhost:' + dst_path,
        name='target', recursive=True)

    # publish recursively, which silently ignores non-installed datasets
    results = source.publish(to='target', recursive=True)
    assert_result_count(results, 1)
    assert_result_count(results, 1, status='ok', type='dataset', path=source.path)

    # if however, a non-installed subdataset is requsted explicitly, it'll fail
    results = source.publish(path='subm 1', to='target', on_failure='ignore')
    assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
Example #11
0
def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
Example #12
0
def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.add('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    ok_clean_git(sub.path)
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(path=sub.path))
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    # `save -d .` saves the state change in the subdataset, but leaves any untracked
    # content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save())
    ok_clean_git(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.add('new2')
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    with chpwd(parent.path):
        assert_status(
            # notneeded to save sub, but need to save parent
            ['ok', 'notneeded'],
            # the key condition of this test is that no reference dataset is
            # given!
            save(path='sub', super_datasets=True))
    # save super must not cause untracked content to be commited!
    ok_clean_git(parent.path, untracked=['untracked'])
Example #13
0
def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    assert_repo_status(parent.path)
    sub1 = parent.create(op.join('down', 'sub1'))
    assert_repo_status(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    assert_repo_status(parent.path, modified=['sub2'])
    res = parent.save()
    assert_repo_status(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    assert_repo_status(parent.path, modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.save(recursive=True)
    # the key action is done
    assert_result_count(res,
                        1,
                        path=op.join(subsub.path, 'new'),
                        action='add',
                        status='ok')
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    assert_repo_status(parent.path)
Example #14
0
def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(opj(path, "origin"))
    dspath = opj(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(dspath, {
        "mike1": 'mike1',  # will be added from topdir
        "later": "later",  # later from within subdir
        "d": {
            "mike2": 'mike2', # to be added within subdir
        }
    })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds.save("committing", path="./mike1")

    # Let's also do in subdirectory
    with chpwd(opj(dspath, 'd')):
        ds.repo.add("mike2", git=True)
        ds.save("committing", path="./mike2")

        later = opj(pardir, "later")
        ds.repo.add(later, git=True)
        ds.save("committing", path=later)

    ok_clean_git(dspath)
Example #15
0
def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(opj(path, "origin"))
    dspath = opj(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(dspath, {
        "mike1": 'mike1',  # will be added from topdir
        "later": "later",  # later from within subdir
        "d": {
            "mike2": 'mike2', # to be added within subdir
        }
    })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds._save("committing", path="./mike1")

    # Let's also do in subdirectory
    with chpwd(opj(dspath, 'd')):
        ds.repo.add("mike2", git=True)
        ds._save("committing", path="./mike2")

        later = opj(pardir, "later")
        ds.repo.add(later, git=True)
        ds._save("committing", path=later)

    ok_clean_git(dspath)
Example #16
0
def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    ok_clean_git(parent.path)
    sub1 = parent.create(opj('down', 'sub1'))
    ok_clean_git(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    ok_clean_git(parent.path, index_modified=['sub2'])
    res = parent.save()
    ok_clean_git(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    ok_clean_git(parent.path, index_modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.add('.', recursive=True)
    # the key action is done
    assert_result_count(
        res, 1, path=opj(subsub.path, 'new'), action='add', status='ok')
    # sub1 is untouched, and not reported
    assert_result_count(res, 0, path=sub1.path)
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    ok_clean_git(parent.path)
Example #17
0
 def check_renamed_file(recursive, annex, path):
     ds = Dataset(path).create(annex=annex)
     create_tree(path, {'old': ''})
     ds.repo.add('old')
     ds.repo.call_git(["mv"], files=["old", "new"])
     ds.save(recursive=recursive)
     assert_repo_status(path)
Example #18
0
def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(op.join(path, "origin"))
    dspath = op.join(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(dspath, {
        "mike1": 'mike1',  # will be added from topdir
        "later": "later",  # later from within subdir
        "d": {
            "mike2": 'mike2', # to be added within subdir
        }
    })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds.save(message="committing", path="./mike1")

    # Let's also do in subdirectory as CWD, check that relative path
    # given to a plain command (not dataset method) are treated as
    # relative to CWD
    with chpwd(op.join(dspath, 'd')):
        save(dataset=ds.path,
             message="committing",
             path="mike2")

        later = op.join(op.pardir, "later")
        ds.repo.add(later, git=True)
        save(dataset=ds.path, message="committing", path=later)

    assert_repo_status(dspath)
Example #19
0
 def check_renamed_file(recursive, no_annex, path):
     ds = Dataset(path).create(no_annex=no_annex)
     create_tree(path, {'old': ''})
     ds.add('old')
     ds.repo._git_custom_command(['old', 'new'], ['git', 'mv'])
     ds._save(recursive=recursive)
     ok_clean_git(path)
Example #20
0
def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
Example #21
0
def make_studyforrest_mockup(path):
    """Generate a dataset structure mimicking aspects of studyforrest.org

    Under the given path there are two directories:

    public - to be published datasets
    private - never to be published datasets

    The 'public' directory itself is a superdataset, the 'private' directory
    is just a directory that contains standalone datasets in subdirectories.
    """
    public = create(opj(path, 'public'), description="umbrella dataset")
    # the following tries to capture the evolution of the project
    phase1 = public.create('phase1',
                           description='old-style, no connection to RAW')
    structural = public.create('structural', description='anatomy')
    tnt = public.create('tnt', description='image templates')
    tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto')
    tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless='auto')
    aligned = public.create('aligned', description='aligned image data')
    aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto')
    aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto')
    # new acquisition
    labet = create(opj(path, 'private', 'labet'), description="raw data ET")
    phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI")
    phase2 = public.create('phase2',
                           description='new-style, RAW connection')
    phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless='auto')
    phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless='auto')
    # add to derivatives
    tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto')
    aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto')
    # never to be published media files
    media = create(opj(path, 'private', 'media'), description="raw data ET")
    # assuming all annotations are in one dataset (in reality this is also
    # a superdatasets with about 10 subdatasets
    annot = public.create('annotations', description='stimulus annotation')
    annot.clone(source=media.path, path=opj('src', 'media'), reckless='auto')
    # a few typical analysis datasets
    # (just doing 3, actual status quo is just shy of 10)
    # and also the real goal -> meta analysis
    metaanalysis = public.create('metaanalysis', description="analysis of analyses")
    for i in range(1, 3):
        ana = public.create('analysis{}'.format(i),
                            description='analysis{}'.format(i))
        ana.clone(source=annot.path, path=opj('src', 'annot'), reckless='auto')
        ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless='auto')
        ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto')
        # link to metaanalysis
        metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)),
                           reckless='auto')
        # simulate change in an input (but not raw) dataset
        create_tree(
            aligned.path,
            {'modification{}.txt'.format(i): 'unique{}'.format(i)})
        aligned.save()
    # finally aggregate data
    aggregate = public.create('aggregate', description='aggregate data')
    aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless='auto')
Example #22
0
def make_studyforrest_mockup(path):
    """Generate a dataset structure mimicking aspects of studyforrest.org

    Under the given path there are two directories:

    public - to be published datasets
    private - never to be published datasets

    The 'public' directory itself is a superdataset, the 'private' directory
    is just a directory that contains standalone datasets in subdirectories.
    """
    public = create(opj(path, 'public'), description="umbrella dataset")
    # the following tries to capture the evolution of the project
    phase1 = public.create('phase1',
                           description='old-style, no connection to RAW')
    structural = public.create('structural', description='anatomy')
    tnt = public.create('tnt', description='image templates')
    tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
    tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True)
    aligned = public.create('aligned', description='aligned image data')
    aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
    aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
    # new acquisition
    labet = create(opj(path, 'private', 'labet'), description="raw data ET")
    phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI")
    phase2 = public.create('phase2',
                           description='new-style, RAW connection')
    phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True)
    phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True)
    # add to derivatives
    tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
    aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
    # never to be published media files
    media = create(opj(path, 'private', 'media'), description="raw data ET")
    # assuming all annotations are in one dataset (in reality this is also
    # a superdatasets with about 10 subdatasets
    annot = public.create('annotations', description='stimulus annotation')
    annot.clone(source=media.path, path=opj('src', 'media'), reckless=True)
    # a few typical analysis datasets
    # (just doing 3, actual status quo is just shy of 10)
    # and also the real goal -> meta analysis
    metaanalysis = public.create('metaanalysis', description="analysis of analyses")
    for i in range(1, 3):
        ana = public.create('analysis{}'.format(i),
                            description='analysis{}'.format(i))
        ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True)
        ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
        ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
        # link to metaanalysis
        metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)),
                           reckless=True)
        # simulate change in an input (but not raw) dataset
        create_tree(
            aligned.path,
            {'modification{}.txt'.format(i): 'unique{}'.format(i)})
        aligned.save()
    # finally aggregate data
    aggregate = public.create('aggregate', description='aggregate data')
    aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
Example #23
0
def _test_target_ssh_inherit(standardgroup, src_path, target_path):
    ds = Dataset(src_path).create()
    target_url = 'localhost:%s' % target_path
    remote = "magical"
    # for the test of setting a group, will just smoke test while using current
    # user's group
    ds.create_sibling(target_url,
                      name=remote,
                      shared='group',
                      group=os.getgid())  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset
    subds = ds.create('sub')  # so now we got a hierarchy!
    create_tree(subds.path, {'sub.dat': 'lots of data'})
    subds.add('sub.dat')
    ok_file_under_git(subds.path, 'sub.dat', annexed=True)

    target_sub = Dataset(opj(target_path, 'sub'))
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_result_count(
        ds.publish(on_failure='ignore'),
        1,
        status='impossible',
        message=
        'No target sibling configured for default publication, please specific via --to'
    )
    ds.publish(
        to=remote)  # should be ok, non recursive; BUT it (git or us?) would
    # create an empty sub/ directory
    ok_(not target_sub.is_installed())  # still not there
    res = ds.publish(to=remote, recursive=True, on_failure='ignore')
    assert_result_count(res, 2)
    assert_status(('error', 'notneeded'), res)
    assert_result_count(res,
                        1,
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'magical'))
    ds.publish(to=remote, recursive=True, missing='inherit')
    # we added the remote and set all the
    eq_(subds.repo.get_preferred_content('wanted', remote),
        'standard' if standardgroup else '')
    eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

    ok_(target_sub.is_installed())  # it is there now
    eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
    # and we have transferred the content
    if standardgroup and standardgroup == 'backup':
        # only then content should be copied
        ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
    else:
        # otherwise nothing is copied by default
        assert_false(target_sub.repo.file_has_content('sub.dat'))
Example #24
0
def test_invalid_call(path):
    with chpwd(path):
        # no dataset, no luck
        assert_raises(NoDatasetArgumentFound, run, 'doesntmatter')
        # dirty dataset
        ds = Dataset(path).create()
        create_tree(ds.path, {'this': 'dirty'})
        assert_status('impossible', run('doesntmatter', on_failure='ignore'))
Example #25
0
def test_invalid_call(path):
    with chpwd(path):
        # no dataset, no luck
        assert_raises(NoDatasetArgumentFound, run, 'doesntmatter')
        # dirty dataset
        ds = Dataset(path).create()
        create_tree(ds.path, {'this': 'dirty'})
        assert_status('impossible', run('doesntmatter', on_failure='ignore'))
Example #26
0
def test_add_message_file(path):
    ds = Dataset(path).create()
    with assert_raises(ValueError):
        ds.add("blah", message="me", message_file="and me")

    create_tree(path, {"foo": "x", "msg": u"add β"})
    ds.add("foo", message_file=opj(ds.path, "msg"))
    assert_equal(ds.repo.format_commit("%s"), u"add β")
Example #27
0
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH],
        ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Example #28
0
def test_save_message_file(path):
    ds = Dataset(path).create()
    with assert_raises(ValueError):
        ds.save("blah", message="me", message_file="and me")

    create_tree(path, {"foo": "x", "msg": "add foo"})
    ds.repo.add("foo")
    ds.save(message_file=op.join(ds.path, "msg"))
    eq_(ds.repo.repo.git.show("--format=%s", "--no-patch"), "add foo")
def test_preserve_attrs(src, dest):
    create_tree(src, {"src": {"foo": {"bar": "This is test text."}}})
    os.utime(opj(src, "src", "foo", "bar"), (1234567890, 1234567890))
    _RunnerAdapter().put(opj(src, "src"), dest, recursive=True, preserve_attrs=True)
    s = os.stat(opj(dest, "src", "foo", "bar"))
    assert s.st_atime == 1234567890
    assert s.st_mtime == 1234567890
    with open(opj(dest, "src", "foo", "bar")) as fp:
        assert fp.read() == "This is test text."
Example #30
0
def test_save_message_file(path):
    ds = Dataset(path).create()
    with assert_raises(IncompleteResultsError):
        ds.save("blah", message="me", message_file="and me")

    create_tree(path, {"foo": "x", "msg": "add foo"})
    ds.add("foo", save=False)
    ds.save(message_file=opj(ds.path, "msg"))
    assert_equal(ds.repo.repo.git.show("--format=%s", "--no-patch"), "add foo")
Example #31
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)[0]
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    dest.remove('subm 1')
    dest.update(recursive=True)
    dest.update(merge=True, recursive=True)

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'})
    source.save(message="saving changes within subm2",
                recursive=True,
                all_changes=True)
    dest.update(merge=True, recursive=True)
    # and now we can get new file
    dest.get('subm 2/load.dat')
    ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
Example #32
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)[0]
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    dest.remove('subm 1')
    dest.update(recursive=True)
    dest.update(merge=True, recursive=True)

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'})
    source.save(message="saving changes within subm2",
                recursive=True, all_changes=True)
    dest.update(merge=True, recursive=True)
    # and now we can get new file
    dest.get('subm 2/load.dat')
    ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
Example #33
0
def test_read(client):
    client, ds = client
    with client as c:
        assert c.get('/api/v1/auth').status_code == 200
        existing_files = c.get('/api/v1/file').get_json()['files']

        file_content = '{"three": 3}'
        # resource picks up live changes to the dataset
        create_tree(ds.path, {'subdir': {'dummy': file_content}})
        ds.save()
        current_files = c.get('/api/v1/file').get_json()['files']
        testpath = 'subdir/dummy'
        assert testpath not in existing_files
        assert testpath in current_files

        # simple path filtering
        assert c.get('/api/v1/file/*dummy').get_json()['files'] == [testpath]

        # request file content in various supported ways
        for a, kwa in (
                # plain URL routing
            (('/api/v1/file/subdir/dummy', ), {}),
                # URL arg
            (('/api/v1/file?path=subdir%2Fdummy', ), {}),
                # form data
            (('/api/v1/file', ), {
                'data': {
                    'path': testpath
                }
            }),
            (('/api/v1/file', ), {
                'data': json.dumps(dict(path=testpath)),
                'content_type': 'application/json'
            }),
        ):
            rq = c.get(*a, **kwa)
            assert rq.status_code == 200
            assert rq.get_json()['path'] == testpath
            assert rq.get_json()['content'] == file_content

        for j, target in (
            ('no', file_content),
                # JSON decoding
            ('yes', {
                'three': 3
            }),
                # JSON stream decoding
            ('stream', [{
                'three': 3
            }]),
        ):
            assert c.get(
                '/api/v1/file',
                data=json.dumps(dict(path=testpath, json=j)),
                content_type='application/json',
            ).get_json()['content'] == target
Example #34
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {
        "dirt_untracked": "untracked",
        "dirt_modified": "modified"
    })
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status(
        "impossible",
        ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
               inputs=["test-annex.dat"],
               on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path,
                       modified=["dirt_modified"],
                       untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
Example #35
0
 def get_baseline(p):
     ds = Dataset(p).create()
     with chpwd(ds.path):
         subds = create('sub')
     ds.add('sub', save=False)
     create_tree(subds.path, {"staged": ""})
     subds.add("staged", save=False)
     # subdataset has staged changes.
     ok_clean_git(ds.path, index_modified=['sub'])
     return ds
Example #36
0
def test_save_partial_index(path):
    ds = Dataset(path).create(force=True)
    ds.add("foo")
    ok_clean_git(ds.path)
    ds.unlock(path="foo")
    create_tree(ds.path, tree={"foo": "a", "staged": ""},
                remove_existing=True)
    ds.repo.add("staged", git=True)
    ds.save(path="foo")
    ok_clean_git(ds.path, head_modified=["staged"])
Example #37
0
def test_no_interaction_with_untracked_content(path):
    # extracted from what was a metadata test originally
    ds = Dataset(opj(path, 'origin')).create(force=True)
    create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}})
    subds = ds.create('sub', force=True)
    subds.remove(opj('.datalad', 'config'), if_dirty='ignore')
    ok_(not exists(opj(subds.path, '.datalad', 'config')))
    # this will only work, if `remove` didn't do anything stupid and
    # caused all content to be saved
    subds.create('subsub', force=True)
Example #38
0
def test_add_message_file(path):
    ds = Dataset(path).create()
    with assert_raises(ValueError):
        ds.add("blah", message="me", message_file="and me")

    create_tree(path, {"foo": "x",
                       "msg": u"add β"})
    ds.add("foo", message_file=opj(ds.path, "msg"))
    assert_equal(ds.repo.format_commit("%s"),
                 u"add β")
Example #39
0
def test_save_partial_index(path):
    ds = Dataset(path).create(force=True)
    ds.add("foo")
    ok_clean_git(ds.path)
    ds.unlock(path="foo")
    create_tree(ds.path, tree={"foo": "a", "staged": ""},
                remove_existing=True)
    ds.repo.add("staged", git=True)
    ds._save(path="foo")
    ok_clean_git(ds.path, head_modified=["staged"])
Example #40
0
 def get_baseline(p):
     ds = Dataset(p).create()
     with chpwd(ds.path):
         subds = create('sub')
     ds.add('sub', save=False)
     create_tree(subds.path, {"staged": ""})
     subds.add("staged", save=False)
     # subdataset has staged changes.
     ok_clean_git(ds.path, index_modified=['sub'])
     return ds
Example #41
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    # but because we override HOME the HOME on the remote end would be
    # different even though a localhost. So we need to query it
    from datalad import ssh_manager
    ssh = ssh_manager.get_connection('localhost')
    remote_home, err = ssh('pwd')
    assert not err
    remote_home = remote_home.rstrip('\n')
    dst_relpath = os.path.relpath(dst_path, remote_home)
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.add('sub.dat')
    ds.create_sibling(url, ui=True)
    published = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    # "Settings" such as UI do not persist, so we specify it again
    # for the test below depending on it
    ds.create_sibling(url, existing='replace', ui=True)
    published2 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.add('sub2.dat')
    published3 = ds.publish(to='localhost',
                            transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)

    assert_postupdate_hooks(dst_path)
Example #42
0
def test_external_versions_rogue_module(topd):
    ev = ExternalVersions()
    # if module throws some other non-ImportError exception upon import
    # we must not crash, but issue a warning
    modname = 'verycustomrogue__'
    create_tree(topd, {modname + '.py': 'raise Exception("pickaboo")'})
    with patch('sys.path', [topd]), \
        swallow_logs(new_level=logging.WARNING) as cml:
        assert ev[modname] is None
        assert_true(ev.dumps(indent=True).endswith(linesep))
        assert_in('pickaboo', cml.out)
Example #43
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    # but because we override HOME the HOME on the remote end would be
    # different even though a localhost. So we need to query it
    from datalad import ssh_manager
    ssh = ssh_manager.get_connection('localhost')
    remote_home, err = ssh('pwd')
    assert not err
    remote_home = remote_home.rstrip('\n')
    dst_relpath = os.path.relpath(dst_path, remote_home)
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.save('sub.dat')
    ds.create_sibling(url, ui=True)
    published = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    # "Settings" such as UI do not persist, so we specify it again
    # for the test below depending on it
    ds.create_sibling(url, existing='replace', ui=True)
    published2 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.save('sub2.dat')
    published3 = ds.publish(to='localhost', transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)

    assert_postupdate_hooks(dst_path)
Example #44
0
def test_save_message_file(path):
    ds = Dataset(path).create()
    with assert_raises(ValueError):
        ds.save("blah", message="me", message_file="and me")

    create_tree(path, {"foo": "x",
                       "msg": "add foo"})
    ds.repo.add("foo")
    ds.save(message_file=op.join(ds.path, "msg"))
    eq_(ds.repo.repo.git.show("--format=%s", "--no-patch"),
        "add foo")
Example #45
0
def test_save_message_file(path):
    ds = Dataset(path).create()
    with assert_raises(ValueError):
        ds.save("blah", message="me", message_file="and me")

    create_tree(path, {"foo": "x", "msg": "add foo"})
    ds.repo.add("foo")
    ds.save(message_file=op.join(ds.path, "msg"))
    # ATTN: Use master explicitly so that this check works when we're on an
    # adjusted branch too (e.g., when this test is executed under Windows).
    eq_(ds.repo.format_commit("%s", "master"), "add foo")
Example #46
0
def test_update_git_smoke(src_path, dst_path):
    # Apparently was just failing on git repos for basic lack of coverage, hence this quick test
    ds = Dataset(src_path).create(no_annex=True)
    target = install(
        dst_path, source=src_path,
        result_xfm='datasets', return_type='item-or-list')
    create_tree(ds.path, {'file.dat': '123'})
    ds.save('file.dat')
    assert_result_count(
        target.update(recursive=True, merge=True), 1,
        status='ok', type='dataset')
    ok_file_has_content(opj(target.path, 'file.dat'), '123')
Example #47
0
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert(knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), ['master'])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin._git_custom_command([], ['git', 'config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin._git_custom_command([], ['git', 'branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/master'], ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Example #48
0
def _test_target_ssh_inherit(standardgroup, src_path, target_path):
    ds = Dataset(src_path).create()
    target_url = 'localhost:%s' % target_path
    remote = "magical"
    # for the test of setting a group, will just smoke test while using current
    # user's group
    ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid())  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset
    subds = ds.create('sub')  # so now we got a hierarchy!
    create_tree(subds.path, {'sub.dat': 'lots of data'})
    subds.add('sub.dat')
    ok_file_under_git(subds.path, 'sub.dat', annexed=True)

    target_sub = Dataset(opj(target_path, 'sub'))
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_result_count(
        ds.publish(on_failure='ignore'),
        1,
        status='impossible',
        message='No target sibling configured for default publication, please specific via --to')
    ds.publish(to=remote)  # should be ok, non recursive; BUT it (git or us?) would
                  # create an empty sub/ directory
    ok_(not target_sub.is_installed())  # still not there
    res = ds.publish(to=remote, recursive=True, on_failure='ignore')
    assert_result_count(res, 2)
    assert_status(('error', 'notneeded'), res)
    assert_result_count(
        res, 1,
        status='error',
        message=("Unknown target sibling '%s' for publication", 'magical'))
    ds.publish(to=remote, recursive=True, missing='inherit')
    # we added the remote and set all the
    eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '')
    eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

    ok_(target_sub.is_installed())  # it is there now
    eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
    # and we have transferred the content
    if standardgroup and standardgroup == 'backup':
        # only then content should be copied
        ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
    else:
        # otherwise nothing is copied by default
        assert_false(target_sub.repo.file_has_content('sub.dat'))
Example #49
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {"dirt_untracked": "untracked",
                          "dirt_modified": "modified"})
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status("impossible",
                  ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
                         inputs=["test-annex.dat"],
                         on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
    def test_override_existing_under_git(self):
        create_tree(self.annex.path, {'1.dat': 'load2'})
        self.annex.add('1.dat', git=True)
        self.annex.commit('added to git')
        add_archive_content(
            '1.tar', annex=self.annex, strip_leading_dirs=True,
        )
        # and we did not bother adding it to annex (for now) -- just skipped
        # since we have it and it is the same
        ok_file_under_git(self.annex.path, '1.dat', annexed=False)

        # but if we say 'overwrite' -- we would remove and replace
        add_archive_content(
            '1.tar', annex=self.annex, strip_leading_dirs=True, delete=True
            , existing='overwrite'
        )
        ok_file_under_git(self.annex.path, '1.dat', annexed=True)
Example #51
0
def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset')
    # now recursive
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {'onefile': 'tobeadded', 'sub': {'twofile': 'tobeadded'}})
    res = ds.diff(recursive=True, report_untracked='all')
    assert_result_count(res, 3)
    assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file')
    assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
    assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file')
    # save sub
    sub.add('.')
    # save sub in parent
    ds.save()
    # save addition in parent
    ds.add('.')
    ok_clean_git(ds.path)
    # look at the last change, only one file was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')

    # now the exact same thing with recursion, must not be different from the call
    # above
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    # last change in parent
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')

    # one further back brings in the modified subdataset, and the added file within it
    res = ds.diff(recursive=True, revision='HEAD~2..HEAD')
    assert_result_count(res, 3)
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')
    assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file')
    assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
    def test_add_delete_after_and_drop_subdir(self):
        os.mkdir(opj(self.annex.path, 'subdir'))
        mv_out = self.annex._git_custom_command(
            [],
            ['git', 'mv', '1.tar', 'subdir']
        )
        self.annex.commit("moved into subdir")
        with chpwd(self.annex.path):
            # was failing since deleting without considering if tarball
            # was extracted in that tarball directory
            commits_prior_master = list(self.annex.get_branch_commits())
            commits_prior = list(self.annex.get_branch_commits('git-annex'))
            add_out = add_archive_content(
                opj('subdir', '1.tar'),
                delete_after=True,
                drop_after=True)
            ok_clean_git(self.annex.path)
            commits_after_master = list(self.annex.get_branch_commits())
            commits_after = list(self.annex.get_branch_commits('git-annex'))
            # There should be a single commit for all additions +1 to
            # initiate datalad-archives gh-1258.  If faking dates,
            # there should be another +1 because annex.alwayscommit
            # isn't set to false.
            assert_equal(len(commits_after),
                         len(commits_prior) + 2 + self.annex.fake_dates_enabled)
            assert_equal(len(commits_after_master), len(commits_prior_master))
            assert(add_out is self.annex)
            # there should be no .datalad temporary files hanging around
            self.assert_no_trash_left_behind()

            # and if we add some untracked file, redo, there should be no changes
            # to master and file should remain not committed
            create_tree(self.annex.path, {'dummy.txt': '123'})
            assert_true(self.annex.dirty)  # untracked file
            add_out = add_archive_content(
                opj('subdir', '1.tar'),
                delete_after=True,
                drop_after=True,
                allow_dirty=True)
            ok_clean_git(self.annex.path, untracked=['dummy.txt'])
            assert_equal(len(list(self.annex.get_branch_commits())),
                         len(commits_prior_master))

            # there should be no .datalad temporary files hanging around
            self.assert_no_trash_left_behind()
Example #53
0
def test_save_to_git(path):
    ds = Dataset(path).create()
    create_tree(
        ds.path,
        {
            'file_ingit': 'file_ingit',
            'file_inannex': 'file_inannex',
        }
    )
    ds.repo.save(paths=['file_ingit'], git=True)
    ds.repo.save(paths=['file_inannex'])
    assert_repo_status(ds.repo)
    for f, p in iteritems(ds.repo.annexstatus()):
        eq_(p['state'], 'clean')
        if f.match('*ingit'):
            assert_not_in('key', p, f)
        elif f.match('*inannex'):
            assert_in('key', p, f)
Example #54
0
def test_get_invalid_call(path, file_outside):

    # no argument at all:
    assert_raises(InsufficientArgumentsError, get, None)
    assert_raises(InsufficientArgumentsError, get, [])
    # invalid dataset:
    assert_raises(ValueError, get, None, dataset=path)

    # have a plain git:
    ds = Dataset(path)
    ds.create(no_annex=True)
    with open(opj(path, "some.txt"), "w") as f:
        f.write("whatever")
    ds.add("some.txt", to_git=True)
    ds.save("Initial commit.")

    # make it an annex:
    AnnexRepo(path, init=True, create=True)
    # call get again on a file in git:
    result = ds.get("some.txt")
    # skipped silently, but no result for this file:
    eq_(len(result), 0)

    # invalid source:
    # yoh:  but now we would need to add it to annex since clever code first
    # checks what needs to be fetched at all
    create_tree(path, {'annexed.dat': 'some'})
    ds.add("annexed.dat")
    ds.repo.drop("annexed.dat", options=['--force'])
    with assert_raises(RemoteNotAvailableError) as ce:
        ds.get("annexed.dat", source='MysteriousRemote')
    eq_("MysteriousRemote", ce.exception.remote)

    # warning on not existing file:
    with swallow_logs(new_level=logging.WARNING) as cml:
        with assert_raises(IncompleteResultsError) as cme:
            ds.get("NotExistingFile.txt")
        result = cme.exception.results
        eq_(len(result), 0)
        assert_in("ignored non-existing paths", cml.out)

    # path outside repo errors as with most other commands:
    assert_raises(ValueError, ds.get, file_outside)
Example #55
0
def test_no_annex(path):
    ds = create(path)
    ok_clean_git(ds.path)
    create_tree(
        ds.path,
        {'code': {
            'inannex': 'content',
            'notinannex': 'othercontent'},
         'README': 'please'})
    # add inannex pre configuration
    ds.save(opj('code', 'inannex'))
    no_annex(pattern=['code/**', 'README'], dataset=ds)
    # add inannex and README post configuration
    ds.save([opj('code', 'notinannex'), 'README'])
    ok_clean_git(ds.path)
    # one is annex'ed, the other is not, despite no change in add call
    # importantly, also .gitattribute is not annexed
    eq_([opj('code', 'inannex')],
        ds.repo.get_annexed_files())
Example #56
0
    def setup_class(cls):
        mktmp_kws = get_tempfile_kwargs()
        path = tempfile.mkdtemp(**mktmp_kws)
        create_tree(path,
                    {"udir": {x + ".dat" + ver: x + " content"
                              for x in "abcd"
                              for ver in ["", ".v1"]}})

        cls._hpath = HTTPPath(path)
        cls._hpath.start()
        cls.url = cls._hpath.url

        cls.json_file = tempfile.mktemp(suffix=".json", **mktmp_kws)
        with open(cls.json_file, "w") as jfh:
            json.dump(
                [{"url": cls.url + "udir/a.dat", "name": "a", "subdir": "foo"},
                 {"url": cls.url + "udir/b.dat", "name": "b", "subdir": "bar"},
                 {"url": cls.url + "udir/c.dat", "name": "c", "subdir": "foo"}],
                jfh)
Example #57
0
def test_failon_nodrop(path):
    # test to make sure that we do not wipe out data when checks are enabled
    # despite the general error behavior mode
    ds = Dataset(path).create()
    # we play with a subdataset to bypass the tests that prevent the removal
    # of top-level datasets
    sub = ds.create('sub')
    create_tree(sub.path, {'test': 'content'})
    ds.save(opj('sub', 'test'))
    ok_clean_git(ds.path)
    eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
    # we put one file into the dataset's annex, no redundant copies
    # neither uninstall nor remove should work
    res = ds.uninstall('sub', check=True, on_failure='ignore')
    assert_status(['error', 'impossible'], res)
    eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
    # same with remove
    res = ds.remove('sub', check=True, on_failure='ignore')
    assert_status(['error', 'impossible'], res)
    eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
Example #58
0
def test_get_tags(path):
    from mock import patch

    gr = GitRepo(path, create=True)
    eq_(gr.get_tags(), [])
    eq_(gr.describe(), None)

    # Explicitly override the committer date because tests may set it to a
    # fixed value, but we want to check that the returned tags are sorted by
    # the committer date.
    with patch.dict("os.environ", {"GIT_COMMITTER_DATE":
                                   "Thu, 07 Apr 2005 22:13:13 +0200"}):
        create_tree(gr.path, {'file': ""})
        gr.add('file')
        gr.commit(msg="msg")
        eq_(gr.get_tags(), [])
        eq_(gr.describe(), None)

        gr.tag("nonannotated")
        tags1 = [{'name': 'nonannotated', 'hexsha': gr.get_hexsha()}]
        eq_(gr.get_tags(), tags1)
        eq_(gr.describe(), None)
        eq_(gr.describe(tags=True), tags1[0]['name'])

    first_commit = gr.get_hexsha()

    with patch.dict("os.environ", {"GIT_COMMITTER_DATE":
                                   "Fri, 08 Apr 2005 22:13:13 +0200"}):

        create_tree(gr.path, {'file': "123"})
        gr.add('file')
        gr.commit(msg="changed")

    gr.tag("annotated", message="annotation")
    tags2 = tags1 + [{'name': 'annotated', 'hexsha': gr.get_hexsha()}]
    eq_(gr.get_tags(), tags2)
    eq_(gr.describe(), tags2[1]['name'])

    # compare prev commit
    eq_(gr.describe(commitish=first_commit), None)
    eq_(gr.describe(commitish=first_commit, tags=True), tags1[0]['name'])
Example #59
0
def test_get_invalid_call(path, file_outside):

    # no argument at all:
    assert_raises(InsufficientArgumentsError, get, None)
    assert_raises(InsufficientArgumentsError, get, [])
    # invalid dataset:
    assert_status('impossible', get(None, dataset=path, on_failure='ignore'))

    # have a plain git:
    ds = Dataset(path)
    ds.create(no_annex=True)
    with open(opj(path, "some.txt"), "w") as f:
        f.write("whatever")
    ds.save("some.txt", to_git=True, message="Initial commit.")

    # make it an annex (remove indicator file that create has placed
    # in the dataset to make it possible):
    (ds.pathobj / '.noannex').unlink()
    AnnexRepo(path, init=True, create=True)
    # call get again on a file in git:
    result = ds.get("some.txt")
    assert_status('notneeded', result)

    # invalid source:
    # yoh:  but now we would need to add it to annex since clever code first
    # checks what needs to be fetched at all
    create_tree(path, {'annexed.dat': 'some'})
    ds.save("annexed.dat")
    ds.repo.drop("annexed.dat", options=['--force'])
    with assert_raises(RemoteNotAvailableError) as ce:
        ds.get("annexed.dat", source='MysteriousRemote')
    eq_("MysteriousRemote", ce.exception.remote)

    res = ds.get("NotExistingFile.txt", on_failure='ignore')
    assert_status('impossible', res)
    assert_message("path does not exist", res)

    # path outside repo errors as with most other commands:
    res = ds.get(file_outside, on_failure='ignore')
    assert_in_results(
        res, status='impossible', message='path not associated with any dataset')