Ejemplos de save en Python, ejemplos de datalad.api.save en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(op.join(path, "origin"))
    dspath = op.join(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(dspath, {
        "mike1": 'mike1',  # will be added from topdir
        "later": "later",  # later from within subdir
        "d": {
            "mike2": 'mike2', # to be added within subdir
        }
    })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds.save(message="committing", path="./mike1")

    # Let's also do in subdirectory as CWD, check that relative path
    # given to a plain command (not dataset method) are treated as
    # relative to CWD
    with chpwd(op.join(dspath, 'd')):
        save(dataset=ds.path,
             message="committing",
             path="mike2")

        later = op.join(op.pardir, "later")
        ds.repo.add(later, git=True)
        save(dataset=ds.path, message="committing", path=later)

    assert_repo_status(dspath)

Ejemplo n.º 3

0

Mostrar archivo

def test_subsuperdataset_save(path):
    # Verify that when invoked without recursion save does not
    # cause querying of subdatasets of the subdataset
    # see https://github.com/datalad/datalad/issues/4523
    parent = Dataset(path).create()
    # Create 3 levels of subdatasets so later to check operation
    # with or without --dataset being specified
    sub1 = parent.create('sub1')
    sub2 = parent.create(sub1.pathobj / 'sub2')
    sub3 = parent.create(sub2.pathobj / 'sub3')
    assert_repo_status(path)
    # now we will lobotomize that sub2 so git would fail if any query is performed.
    rmtree(str(sub3.pathobj / '.git' / 'objects'))
    # the call should proceed fine since neither should care about sub3
    # default is no recursion
    parent.save('sub1')
    sub1.save('sub2')
    assert_raises(CommandError, parent.save, 'sub1', recursive=True)
    # and should fail if we request saving while in the parent directory
    # but while not providing a dataset, since operation would run within
    # pointed subdataset
    with chpwd(sub1.path):
        assert_raises(CommandError, save, 'sub2')
    # but should not fail in the top level superdataset
    with chpwd(parent.path):
        save('sub1')

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_save.py Proyecto: jelmer/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.add('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    ok_clean_git(sub.path)
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(path=sub.path))
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    # `save -d .` saves the state change in the subdataset, but leaves any untracked
    # content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save())
    ok_clean_git(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.add('new2')
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    with chpwd(parent.path):
        assert_status(
            # notneeded to save sub, but need to save parent
            ['ok', 'notneeded'],
            # the key condition of this test is that no reference dataset is
            # given!
            save(path='sub', super_datasets=True))
    # save super must not cause untracked content to be commited!
    ok_clean_git(parent.path, untracked=['untracked'])

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(op.join(path, "origin"))
    dspath = op.join(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(
        dspath,
        {
            "mike1": 'mike1',  # will be added from topdir
            "later": "later",  # later from within subdir
            "d": {
                "mike2": 'mike2',  # to be added within subdir
            }
        })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds.save(message="committing", path="./mike1")

    # Let's also do in subdirectory as CWD, check that relative path
    # given to a plain command (not dataset method) are treated as
    # relative to CWD
    with chpwd(op.join(dspath, 'd')):
        save(dataset=ds.path, message="committing", path="mike2")

        later = op.join(op.pardir, "later")
        ds.repo.add(later, git=True)
        save(dataset=ds.path, message="committing", path=later)

    assert_repo_status(dspath)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_gh2043p1(path):
    # this tests documents the interim agreement on what should happen
    # in the case documented in gh-2043
    ds = Dataset(path).create(force=True)
    ds.save('1')
    assert_repo_status(ds.path, untracked=['2', '3'])
    ds.unlock('1')
    assert_repo_status(
        ds.path,
        # on windows we are in an unlocked branch by default, hence
        # we would see no change
        modified=[] if on_windows else ['1'],
        untracked=['2', '3'])
    # save(.) should recommit unlocked file, and not touch anything else
    # this tests the second issue in #2043
    with chpwd(path):
        # only save modified bits
        save(path='.', updated=True)
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    assert_repo_status(ds.path, untracked=['2', '3'])
    with chpwd(path):
        # but when a path is given, anything that matches this path
        # untracked or not is added/saved
        save(path='.')
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    assert_repo_status(ds.path)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_gh2043p1(path):
    # this tests documents the interim agreement on what should happen
    # in the case documented in gh-2043
    ds = Dataset(path).create(force=True)
    ds.save('1')
    assert_repo_status(ds.path, untracked=['2', '3'])
    ds.unlock('1')
    assert_repo_status(
        ds.path,
        # on windows we are in an unlocked branch by default, hence
        # we would see no change
        modified=[] if on_windows else ['1'],
        untracked=['2', '3'])
    # save(.) should recommit unlocked file, and not touch anything else
    # this tests the second issue in #2043
    with chpwd(path):
        # only save modified bits
        save(path='.', updated=True)
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    assert_repo_status(ds.path, untracked=['2', '3'])
    with chpwd(path):
        # but when a path is given, anything that matches this path
        # untracked or not is added/saved
        save(path='.')
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    assert_repo_status(ds.path)

Ejemplo n.º 9

0

Mostrar archivo

def test_subsuperdataset_save(path):
    # Verify that when invoked without recursion save does not
    # cause querying of subdatasets of the subdataset
    # see https://github.com/datalad/datalad/issues/4523
    parent = Dataset(path).create()
    # Create 3 levels of subdatasets so later to check operation
    # with or without --dataset being specified
    sub1 = parent.create('sub1')
    sub2 = parent.create(sub1.pathobj / 'sub2')
    sub3 = parent.create(sub2.pathobj / 'sub3')
    assert_repo_status(path)
    # now we will lobotomize that sub3 so git would fail if any query is performed.
    (sub3.pathobj / '.git' / 'config').chmod(0o000)
    try:
        sub3.repo.call_git(['ls-files'], read_only=True)
        raise SkipTest
    except CommandError:
        # desired outcome
        pass
    # the call should proceed fine since neither should care about sub3
    # default is no recursion
    parent.save('sub1')
    sub1.save('sub2')
    assert_raises(CommandError, parent.save, 'sub1', recursive=True)
    # and should not fail in the top level superdataset
    with chpwd(parent.path):
        save('sub1')
    # or in a subdataset above the problematic one
    with chpwd(sub1.path):
        save('sub2')

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_relpath_add(path):
    ds = Dataset(path).create(force=True)
    with chpwd(op.join(path, 'dir')):
        eq_(save('testindir')[0]['path'], op.join(ds.path, 'dir', 'testindir'))
        # and now add all
        save('..')
    # auto-save enabled
    assert_repo_status(ds.path)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_testing_infrastructure.py Proyecto: mtang4/afni

def make_pretend_repo(dirname):
    os.chdir(dirname)
    datalad.create(str(dirname), force=True)
    rev_log = datalad.save("add data", str(dirname))
    (dirname / "useless.txt").write_text("who me.")
    datalad.save("add superfluous change", str(dirname))
    (dirname / "useless.txt").unlink()
    datalad.save("make things better", str(dirname))

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_path_arg_call(path):
    ds = create(path)
    for testfile in (ds.pathobj / 'abs.txt', ds.pathobj / 'rel.txt'):
        testfile.write_text(u'123')
        # we used to resolve relative paths against a dataset just given by
        # a path, but we no longer do that
        #save(dataset=ds.path, path=[testfile.name], to_git=True)
        save(dataset=ds, path=[testfile.name], to_git=True)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_bf2043p2(path):
    ds = Dataset(path).create(force=True)
    ds.repo.add('staged')
    assert_repo_status(ds.path, added=['staged'], untracked=['untracked'])
    # save -u does not commit untracked content
    # this tests the second issue in #2043
    with chpwd(path):
        save(updated=True)
    assert_repo_status(ds.path, untracked=['untracked'])

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_save.py Proyecto: hanke/datalad

def test_bf2043p2(path):
    ds = Dataset(path).create(force=True)
    ds.add('staged', save=False)
    ok_clean_git(ds.path, head_modified=['staged'], untracked=['untracked'])
    # plain save does not commit untracked content
    # this tests the second issue in #2043
    with chpwd(path):
        save()
    ok_clean_git(ds.path, untracked=['untracked'])

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_bf2043p2(path):
    ds = Dataset(path).create(force=True)
    ds.repo.add('staged')
    assert_repo_status(ds.path, added=['staged'], untracked=['untracked'])
    # save -u does not commit untracked content
    # this tests the second issue in #2043
    with chpwd(path):
        save(updated=True)
    assert_repo_status(ds.path, untracked=['untracked'])

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_relpath_add(path):
    ds = Dataset(path).create(force=True)
    with chpwd(op.join(path, 'dir')):
        eq_(save('testindir')[0]['path'],
            op.join(ds.path, 'dir', 'testindir'))
        # and now add all
        save('..')
    # auto-save enabled
    assert_repo_status(ds.path)

Ejemplo n.º 17

0

Mostrar archivo

def test_bf2043p2(path):
    ds = Dataset(path).create(force=True)
    ds.add('staged', save=False)
    ok_clean_git(ds.path, head_modified=['staged'], untracked=['untracked'])
    # plain save does not commit untracked content
    # this tests the second issue in #2043
    with chpwd(path):
        save()
    ok_clean_git(ds.path, untracked=['untracked'])

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_path_arg_call(path):
    ds = create(path)
    for testfile in (
            ds.pathobj / 'abs.txt',
            ds.pathobj / 'rel.txt'):
        testfile.write_text(u'123')
        # we used to resolve relative paths against a dataset just given by
        # a path, but we no longer do that
        #save(dataset=ds.path, path=[testfile.name], to_git=True)
        save(dataset=ds, path=[testfile.name], to_git=True)

Ejemplo n.º 19

0

Mostrar archivo

def test_save_directory(path):
    # Sequence of save invocations on subdirectories.
    ds = Dataset(path).create(force=True)
    ds.save(path='sdir1')
    ok_clean_git(ds.path, untracked=['sdir2/foo', 'sdir3/sdir/subsub/foo'])

    # There is also difference from
    with chpwd(path):
        save(path='sdir2')
    ok_clean_git(ds.path, untracked=['sdir3/sdir/subsub/foo'])

    with chpwd(opj(path, 'sdir3')):
        save(path='sdir')
    ok_clean_git(ds.path)

Ejemplo n.º 20

0

Mostrar archivo

def test_update_known_submodule(path):
    def get_baseline(p):
        ds = Dataset(p).create()
        sub = create(str(ds.pathobj / 'sub'))
        assert_repo_status(ds.path, untracked=['sub'])
        return ds
    # attempt one
    ds = get_baseline(op.join(path, 'wo_ref'))
    with chpwd(ds.path):
        save(recursive=True)
    assert_repo_status(ds.path)

    # attempt two, same as above but call add via reference dataset
    ds = get_baseline(op.join(path, 'w_ref'))
    ds.save(recursive=True)
    assert_repo_status(ds.path)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

Ejemplo n.º 22

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_update_known_submodule(path):
    def get_baseline(p):
        ds = Dataset(p).create()
        sub = create(text_type(ds.pathobj / 'sub'))
        assert_repo_status(ds.path, untracked=['sub'])
        return ds
    # attempt one
    ds = get_baseline(op.join(path, 'wo_ref'))
    with chpwd(ds.path):
        save(recursive=True)
    assert_repo_status(ds.path)

    # attempt two, same as above but call add via reference dataset
    ds = get_baseline(op.join(path, 'w_ref'))
    ds.save(recursive=True)
    assert_repo_status(ds.path)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

Ejemplo n.º 24

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_bf2541(path):
    ds = create(path)
    subds = ds.create('sub')
    assert_repo_status(ds.path)
    os.symlink('sub', op.join(ds.path, 'symlink'))
    with chpwd(ds.path):
        res = save(recursive=True)
    assert_repo_status(ds.path)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_bf2541(path):
    ds = create(path)
    subds = ds.create('sub')
    assert_repo_status(ds.path)
    os.symlink('sub', op.join(ds.path, 'symlink'))
    with chpwd(ds.path):
        res = save(recursive=True)
    assert_repo_status(ds.path)

Ejemplo n.º 26

0

Mostrar archivo

def test_bf1886(path):
    parent = Dataset(path).create()
    parent.create('sub')
    assert_repo_status(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', op.join(parent.path, 'down'))
    parent.save('down')
    assert_repo_status(parent.path)
    # now symlink pointing up
    os.makedirs(op.join(parent.path, 'subdir', 'subsubdir'))
    os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up'))
    parent.save(op.join('subdir', 'up'))
    # 'all' to avoid the empty dir being listed
    assert_repo_status(parent.path, untracked_mode='all')
    # now symlink pointing 2xup, as in #1886
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.save(op.join('subdir', 'subsubdir', 'upup'))
    assert_repo_status(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    create(op.join(parent.path, 'sub2'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub2'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')])
    assert_repo_status(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    create(op.join(parent.path, 'sub3'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub3'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(op.join(parent.path, 'subdir', 'subsubdir')):
        save([op.join(parent.path, 'sub3'),
              op.join(parent.path, 'subdir', 'subsubdir', 'upup3')])
    assert_repo_status(parent.path)

Ejemplo n.º 27

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_bf1886(path):
    parent = Dataset(path).create()
    parent.create('sub')
    assert_repo_status(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', op.join(parent.path, 'down'))
    parent.save('down')
    assert_repo_status(parent.path)
    # now symlink pointing up
    os.makedirs(op.join(parent.path, 'subdir', 'subsubdir'))
    os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up'))
    parent.save(op.join('subdir', 'up'))
    # 'all' to avoid the empty dir being listed
    assert_repo_status(parent.path, untracked_mode='all')
    # now symlink pointing 2xup, as in #1886
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.save(op.join('subdir', 'subsubdir', 'upup'))
    assert_repo_status(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    create(op.join(parent.path, 'sub2'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub2'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')])
    assert_repo_status(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    create(op.join(parent.path, 'sub3'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub3'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(op.join(parent.path, 'subdir', 'subsubdir')):
        save([op.join(parent.path, 'sub3'),
              op.join(parent.path, 'subdir', 'subsubdir', 'upup3')])
    assert_repo_status(parent.path)

Ejemplo n.º 28

0

Mostrar archivo

Archivo: test_save.py Proyecto: hanke/datalad

def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.add('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    ok_clean_git(sub.path)
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(path=sub.path))
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    # `save -d .` saves the state change in the subdataset, but leaves any untracked
    # content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save())
    ok_clean_git(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.add('new2')
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    with chpwd(parent.path):
        assert_status(
            # notneeded to save sub, but need to save parent
            ['ok', 'notneeded'],
            # the key condition of this test is that no reference dataset is
            # given!
            save(path='sub', super_datasets=True))
    # save super must not cause untracked content to be commited!
    ok_clean_git(parent.path, untracked=['untracked'])

Ejemplo n.º 29

0

Mostrar archivo

def test_gh2043p1(path):
    # this tests documents the interim agreement on what should happen
    # in the case documented in gh-2043
    ds = Dataset(path).create(force=True)
    ds.add('1')
    ok_clean_git(ds.path, untracked=['2', '3'])
    ds.unlock('1')
    ok_clean_git(ds.path, index_modified=['1'], untracked=['2', '3'])
    # save(.) should recommit unlocked file, and not touch anything else
    # this tests the second issue in #2043
    with chpwd(path):
        # only save modified bits by default
        save('.')  #  because the first arg is the dataset
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    skip_v6_or_later(method='pass')(ok_clean_git)(ds.path, untracked=['2', '3'])
    with chpwd(path):
        # but when a path is given, anything that matches this path
        # untracked or not is added/saved
        save(path='.')
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    skip_v6_or_later(method='pass')(ok_clean_git)(ds.path)

Ejemplo n.º 30

0

Mostrar archivo

Archivo: test_save.py Proyecto: hanke/datalad

def test_gh2043p1(path):
    # this tests documents the interim agreement on what should happen
    # in the case documented in gh-2043
    ds = Dataset(path).create(force=True)
    ds.add('1')
    ok_clean_git(ds.path, untracked=['2', '3'])
    ds.unlock('1')
    ok_clean_git(ds.path, index_modified=['1'], untracked=['2', '3'])
    # save(.) should recommit unlocked file, and not touch anything else
    # this tests the second issue in #2043
    with chpwd(path):
        # only save modified bits by default
        save('.')  #  because the first arg is the dataset
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    skip_v6_or_later(method='pass')(ok_clean_git)(ds.path, untracked=['2', '3'])
    with chpwd(path):
        # but when a path is given, anything that matches this path
        # untracked or not is added/saved
        save(path='.')
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    skip_v6_or_later(method='pass')(ok_clean_git)(ds.path)

Ejemplo n.º 31

0

Mostrar archivo

def save_output_to_repo(config_obj):
    base_comparison_dir_path = get_base_comparison_dir_path(config_obj)

    update_msg = "Update data with test run on {d}".format(
        d=dt.datetime.today().strftime("%Y-%m-%d"))

    result = datalad.save(update_msg,
                          str(base_comparison_dir_path),
                          on_failure="stop")

    sample_test_output = get_test_data_path() / "sample_test_output"
    data_message = ("New sample output was saved to {sample_test_output} for "
                    "future comparisons. Consider publishing this new data to "
                    "the publicly accessible servers.. ")
    print(data_message.format(**locals()))

Ejemplo n.º 32

0

Mostrar archivo

Archivo: test_save.py Proyecto: debanjum/datalad

def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_false(ds.save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.get_subdatasets(recursive=True, absolute=True, fulfilled=True),
        [subsubds.path, subds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_equal(ds.save(all_changes=True), [ds])

    # make the new file known to its dataset
    # with #1141 this would be
    #ds.add(newfile_name, save=False)
    subsubds.add(newfile_name, save=False)

    # but remains dirty because of the untracked file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_equal(ds.save(all_changes=True), [])
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_equal(ds.save(all_changes=True, recursive=True), [subsubds, subds, ds])
    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_false(ds.save())
    # no recursive
    assert_false(ds.save(all_changes=True))
    # an explicit target saves only the corresponding dataset
    assert_equal(save(files=[testfname]), [subsubds])
    # plain recursive without any files given will save the beast
    assert_equal(ds.save(recursive=True), [subds, ds])
    # there is nothing else to save
    assert_false(ds.save(all_changes=True, recursive=True))
    # one more time and check that all datasets in the hierarchy get updated
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_true(ds.save(all_changes=True, recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_not_equal(old, new)
    # now let's check saving "upwards"
    assert not subds.repo.dirty
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty

    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo, untracked=['testnew'],
                 index_modified=['subsub'], head_modified=['testadded'])
    subsubds.save(message="savingtestmessage", super_datasets=True,
                  all_changes=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')

Ejemplo n.º 33

0

Mostrar archivo

Archivo: test_save.py Proyecto: datalad/datalad

def test_save(path):

    ds = Dataset(path)

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save(message="add a new file")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save(message="modified new_file.tst")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save(message="love rapunzel")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save(message="love marsians")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(op.join(path, fn), "w") as f:
            f.write(fn)

    ds.save([op.join(path, f) for f in files])
    # superfluous call to save (alll saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save(message="set of new files"))
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(op.join(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.save()
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # ensure modified subds is committed
    ds.save()
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds', version_tag='new_sub')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    tags = ds.repo.get_tags()
    ok_(len(tags) == 1)
    eq_(tags[0], dict(hexsha=ds.repo.get_hexsha(), name='new_sub'))
    # fails when retagged, like git does
    res = ds.save(version_tag='new_sub', on_failure='ignore')
    assert_status('error', res)
    assert_result_count(
        res, 1,
        action='save', type='dataset', path=ds.path,
        message=('cannot tag this version: %s',
                 "fatal: tag 'new_sub' already exists"))

Ejemplo n.º 34

0

Mostrar archivo

def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_false(ds.save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.get_subdatasets(recursive=True, absolute=True, fulfilled=True),
        [subsubds.path, subds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_equal(ds.save(all_changes=True), [ds])

    # make the new file known to its dataset
    # with #1141 this would be
    #ds.add(newfile_name, save=False)
    subsubds.add(newfile_name, save=False)

    # but remains dirty because of the untracked file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_equal(ds.save(all_changes=True), [])
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_equal(ds.save(all_changes=True, recursive=True),
                 [subsubds, subds, ds])
    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_false(ds.save())
    # no recursive
    assert_false(ds.save(all_changes=True))
    # an explicit target saves only the corresponding dataset
    assert_equal(save(files=[testfname]), [subsubds])
    # plain recursive without any files given will save the beast
    assert_equal(ds.save(recursive=True), [subds, ds])
    # there is nothing else to save
    assert_false(ds.save(all_changes=True, recursive=True))
    # one more time and check that all datasets in the hierarchy get updated
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_true(ds.save(all_changes=True, recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_not_equal(old, new)
    # now let's check saving "upwards"
    assert not subds.repo.dirty
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty

    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo,
                 untracked=['testnew'],
                 index_modified=['subsub'],
                 head_modified=['testadded'])
    subsubds.save(message="savingtestmessage",
                  super_datasets=True,
                  all_changes=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo,
                 untracked=['testnew'],
                 head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(
        next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')
    assert_equal(
        next(subds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')
    assert_equal(
        next(ds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')

Ejemplo n.º 35

0

Mostrar archivo

Archivo: test_save.py Proyecto: hanke/datalad

def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_status('notneeded', ds.save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'),
        [subds.path, subsubds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_result_values_equal(
        ds.save(result_filter=is_ok_dataset),
        'path',
        [ds.path])

    # make the new file known to its dataset
    ds.add(newfile_name, save=False)

    # but remains dirty because of the uncommited file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_status('notneeded', ds.save())
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subsubds.path, subds.path, ds.path])

    # at this point the entire tree is clean
    ok_clean_git(ds.path)
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    # now we save recursively, nothing should happen
    res = ds.save(recursive=True)
    # we do not get any report from a subdataset, because we detect at the
    # very top that the entire tree is clean
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='notneeded', action='save', path=ds.path)
    # now we introduce new files all the way down
    create_tree(subsubds.path, {"mike1": 'mike1'})
    # because we cannot say from the top if there is anything to do down below,
    # we have to traverse and we will get reports for all dataset, but there is
    # nothing actually saved
    res = ds.save(recursive=True)
    assert_result_count(res, 3)
    assert_status('notneeded', res)
    subsubds_indexed = subsubds.repo.get_indexed_files()
    assert_not_in('mike1', subsubds_indexed)
    assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)])
    unlink(opj(subsubds.path, 'mike1'))
    ok_clean_git(ds.path)

    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_status('notneeded', ds.save())
    # no recursive
    assert_status('notneeded', ds.save())
    # an explicit target saves only the corresponding dataset
    assert_result_values_equal(
        save(path=[testfname]),
        'path',
        [subsubds.path])
    # plain recursive without any files given will save the beast
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subds.path, ds.path])
    # there is nothing else to save
    assert_status('notneeded', ds.save(recursive=True))
    ok_clean_git(ds.path)
    # one more time and check that all datasets in the hierarchy are not
    # contaminated with untracked files
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_status('notneeded', ds.save(recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_equal(old, new)
    assert ds.repo.dirty
    unlink(opj(ds.path, testfname))
    ok_clean_git(ds.path)

    # now let's check saving "upwards"
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty
    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo, untracked=['testnew'],
                 index_modified=['subsub'], head_modified=['testadded'])
    old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    subsubds.save(message="savingtestmessage", super_datasets=True)
    # this save actually didn't save anything in subsub (or anywhere),
    # because there were only untracked bits pending
    for old, new in zip(old_states, [d.repo.get_hexsha()
                                     for d in (ds, subds, subsubds)]):
        assert_equal(old, new)
    # but now we are saving this untracked bit specifically
    subsubds.save(message="savingtestmessage", path=['testnew2'],
                  super_datasets=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')

    # and if we try to save while being within that subsubds path
    subsubds.unlock('testnew2')
    create_tree(subsubds.path, {"testnew2": 'smth2'})

    # trying to replicate https://github.com/datalad/datalad/issues/1540
    subsubds.save(message="saving new changes", all_updated=True)  # no super
    with chpwd(subds.path):
        # no explicit dataset is provided by path is provided
        save(path=['subsub'], message='saving sub', super_datasets=True)
    # super should get it saved too
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'saving sub')

Ejemplo n.º 36

0

Mostrar archivo

Archivo: test_save.py Proyecto: hanke/datalad

def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save("add a new file")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save("modified new_file.tst")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save("love rapunzel")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save("love marsians")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # Note/TODO: ok_clean_git is failing in direct mode, due to staged but
    # uncommited .datalad (probably caused within create)
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds.save()
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

Ejemplo n.º 37

0

Mostrar archivo

def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save("add a new file")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save("modified new_file.tst")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save("love rapunzel")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save("love marsians")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # Note/TODO: ok_clean_git is failing in direct mode, due to staged but
    # uncommited .datalad (probably caused within create)
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds.save()
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

Ejemplo n.º 38

0

Mostrar archivo

Archivo: cli.py Proyecto: templateflow/python-manager

def add(
    template_id,
    osf_user,
    osf_password,
    osf_overwrite,
    gh_user,
    gh_token,
    path,
    nprocs,
):
    """Add a new template."""
    from .io import run_command
    from .utils import copy_template
    import shutil
    from datalad import api as dl

    gh_password = getenv("GITHUB_PASSWORD")
    if not gh_user or not gh_token:
        raise click.BadParameter("Insufficient secrets to login into GitHub")

    path = Path(path or f"tpl-{template_id}").absolute()
    cwd = Path.cwd()

    if not path.exists():
        raise click.UsageError(f"<{path}> does not exist.")

    metadata = {}

    # Check metadata
    if (path / "template_description.json").exists():
        metadata = json.loads((path / "template_description.json").read_text())
    metadata["Identifier"] = template_id

    # Check license
    license_path = path / "LICENSE"
    if not license_path.exists():
        license_path = path / "LICENCE"
    if not license_path.exists():
        license_path = path / "COPYING"

    if not license_path.exists():
        license_prompt = click.prompt(
            text="""\
A LICENSE file MUST be distributed with the template. The TemplateFlow Manager can \
set a license (either CC0 or CC-BY) for you.""",
            type=click.Choice(("CC0", "CC-BY", "Custom (abort)")),
            default="Custom (abort)",
        )
        if license_prompt == "Custom (abort)":
            raise click.UsageError(
                "Cannot proceed without a valid license. Please write a LICENSE "
                "file before uploading.")

        license_path = Path(
            pkgr_fn("tfmanager", f"data/{license_prompt}.LICENSE"))
        metadata["License"] = license_prompt

    # Check RRID
    if not metadata.get("RRID"):
        rrid = click.prompt(
            text="Has a RRID (research resource ID) already been assigned?",
            type=str,
            default='') or None

        if rrid:
            metadata["RRID"] = rrid

    # Check short description
    if not metadata.get("Name", "").strip():
        short_desc = click.prompt(
            text="""\
The "Name" metadata is not found within the <template_description.json> file. \
Please provide a short description for this resource.""",
            type=str,
        )

        if not short_desc:
            raise click.UsageError(
                "Cannot proceed without a short description.")

        metadata["Name"] = short_desc

    # Check authors
    authors_prompt = [
        a.strip() for a in metadata.get("Authors", []) if a.strip()
    ]
    if not authors_prompt:
        authors_prompt = [
            n.strip() for n in click.prompt(
                text="""\
The "Authors" metadata is not found within the <template_description.json> file. \
Please provide a list of authors separated by semicolon (;) in <Lastname Initial(s)> format.""",
                type=str,
            ).split(";") if n
        ]
        if not authors_prompt:
            click.confirm("No authors were given, do you want to continue?",
                          abort=True)

    metadata["Authors"] = authors_prompt

    # Check references
    refs_prompt = [
        f"""\
{'https://doi.org/' if not a.strip().startswith('http') else ''}\
{a.replace("doi:", "").strip()}"""
        for a in metadata.get("ReferencesAndLinks", []) if a.strip()
    ]
    if not refs_prompt:
        refs_prompt = [
            n.replace('"', "").strip() for n in click.prompt(
                text="""\
The "ReferencesAndLinks" metadata is not found within the <template_description.json> file. \
Please provide a list of links and publications within double-quotes \
(for example, "doi:10.1101/2021.02.10.430678") and separated by spaces (< >).""",
                type=str,
            ).split(" ") if n
        ]
        if not refs_prompt:
            click.confirm("No authors were given, do you want to continue?",
                          abort=True)
    metadata["ReferencesAndLinks"] = refs_prompt

    with TemporaryDirectory() as tmpdir:
        repodir = Path(tmpdir) / "templateflow"

        # Clone root <user>/templateflow project - fork if necessary
        click.echo(f"Preparing Pull-Request (wd={tmpdir}).")
        clone = run_command(
            f"git clone https://github.com/{gh_user}/templateflow.git "
            "--branch tpl-intake --single-branch",
            cwd=tmpdir,
            capture_output=False,
        )
        if clone.returncode != 0:
            run_command(
                "hub clone templateflow/templateflow",
                cwd=tmpdir,
                capture_output=False,
                env={
                    "GITHUB_USER": gh_user,
                    "GITHUB_PASSWORD": gh_password
                },
            )
            run_command(
                "hub fork --remote-name origin",
                cwd=str(repodir),
                capture_output=False,
                env={
                    "GITHUB_USER": gh_user,
                    "GITHUB_PASSWORD": gh_password
                },
            )
        else:
            run_command(
                "git remote add upstream https://github.com/templateflow/templateflow.git",
                cwd=str(repodir),
                capture_output=False,
            )

        chdir(repodir)

        # Create datalad dataset
        dl.create(
            path=f"tpl-{template_id}",
            cfg_proc="text2git",
            initopts={"initial-branch": "main"},
            description=metadata["Name"],
        )

        # Populate template
        copy_template(
            path=path,
            dest=repodir / f"tpl-{template_id}",
        )
        # Copy license
        shutil.copy(license_path, repodir / f"tpl-{template_id}" / "LICENSE")
        # (Over)write template_description.json
        (repodir / f"tpl-{template_id}" /
         "template_description.json").write_text(json.dumps(metadata,
                                                            indent=2))
        # Init/update CHANGELOG
        changelog = repodir / f"tpl-{template_id}" / "CHANGES"
        changes = [
            f"""
## {datetime.date.today().ctime()} - TemplateFlow Manager Upload
Populated contents after NIfTI sanitizing by the TF Manager.

"""
        ]
        if changelog.exists():
            changes += [changelog.read_text()]
        changelog.write_text("\n".join(changes))

        # Init OSF sibling
        rrid_str = f" (RRID: {metadata['RRID']})" if metadata.get(
            "RRID") else ""
        dl.create_sibling_osf(
            title=f"TemplateFlow resource: <{template_id}>{rrid_str}",
            name="osf",
            dataset=f"./tpl-{template_id}",
            public=True,
            category="data",
            description=metadata["Name"],
            tags=["TemplateFlow dataset", template_id])
        # Init GH sibling
        dl.create_sibling_github(reponame=f"tpl-{template_id}",
                                 dataset=str(repodir / f"tpl-{template_id}"),
                                 github_login=gh_user,
                                 publish_depends="osf-storage",
                                 existing="replace",
                                 access_protocol="ssh")

        # Save added contents
        dl.save(dataset=str(repodir / f"tpl-{template_id}"),
                message="ADD: TemplateFlow Manager initialized contents")

        # Push to siblings
        dl.push(
            dataset=str(repodir / f"tpl-{template_id}"),
            to="github",
            jobs=cpu_count(),
        )

        # Back home
        chdir(cwd)

        run_command(
            "git fetch upstream tpl-intake",
            cwd=str(repodir),
            capture_output=False,
        )
        run_command(
            f"git checkout -b pr/tpl-{template_id} upstream/tpl-intake",
            cwd=str(repodir),
            capture_output=False,
        )
        (repodir / f"{path.name}.toml").write_text(
            toml.dumps({
                "github": {
                    "user": gh_user
                },
            }))
        run_command(
            f"git add {path.name}.toml",
            cwd=str(repodir),
            capture_output=False,
        )
        run_command(
            f"git commit -m 'add(tpl-{template_id}): create intake file'",
            cwd=str(repodir),
            capture_output=False,
        )
        run_command(
            f"git push -u origin pr/tpl-{template_id}",
            cwd=str(repodir),
            capture_output=False,
            env={
                "GITHUB_USER": gh_user,
                "GITHUB_TOKEN": gh_token
            },
        )

        (repodir.parent / "message.md").write_text(f"""\
ADD: ``tpl-{template_id}``

## {metadata.get('Name', '<missing Name>')}

Identifier: {metadata.get('Identifier', '<missing Identifier>')}
Datalad: https://github.com/{gh_user}/tpl-{template_id}

### Authors
{', '.join(metadata['Authors'])}.

### License
{metadata.get('License', metadata.get('Licence', '<missing License>'))}

### Cohorts
{' '.join(('The dataset contains', str(len(metadata.get('cohort', []))), 'cohorts.'))
 if metadata.get('cohort') else 'The dataset does not contain cohorts.'}

### References and links
{', '.join(metadata.get('ReferencesAndLinks', [])) or 'N/A'}

""")
        run_command(
            "hub pull-request -b templateflow:tpl-intake "
            f"-h {gh_user}:pr/tpl-{template_id} "
            f"-F {repodir.parent / 'message.md'}",
            cwd=str(repodir),
            capture_output=False,
            env={
                "GITHUB_USER": gh_user,
                "GITHUB_TOKEN": gh_token
            },
        )

Ejemplo n.º 39

0

Mostrar archivo

def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_status('notneeded', ds.save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'),
        [subds.path, subsubds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_result_values_equal(
        ds.save(result_filter=is_ok_dataset),
        'path',
        [ds.path])

    # make the new file known to its dataset
    ds.add(newfile_name, save=False)

    # but remains dirty because of the uncommited file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_status('notneeded', ds.save())
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subsubds.path, subds.path, ds.path])

    # at this point the entire tree is clean
    ok_clean_git(ds.path)
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    # now we save recursively, nothing should happen
    res = ds.save(recursive=True)
    # we do not get any report from a subdataset, because we detect at the
    # very top that the entire tree is clean
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='notneeded', action='save', path=ds.path)
    # now we introduce new files all the way down
    create_tree(subsubds.path, {"mike1": 'mike1'})
    # because we cannot say from the top if there is anything to do down below,
    # we have to traverse and we will get reports for all dataset, but there is
    # nothing actually saved
    res = ds.save(recursive=True)
    assert_result_count(res, 3)
    assert_status('notneeded', res)
    subsubds_indexed = subsubds.repo.get_indexed_files()
    assert_not_in('mike1', subsubds_indexed)
    assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)])
    unlink(opj(subsubds.path, 'mike1'))
    ok_clean_git(ds.path)

    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_status('notneeded', ds.save())
    # no recursive
    assert_status('notneeded', ds.save())
    # an explicit target saves only the corresponding dataset
    assert_result_values_equal(
        save(path=[testfname]),
        'path',
        [subsubds.path])
    # plain recursive without any files given will save the beast
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subds.path, ds.path])
    # there is nothing else to save
    assert_status('notneeded', ds.save(recursive=True))
    ok_clean_git(ds.path)
    # one more time and check that all datasets in the hierarchy are not
    # contaminated with untracked files
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_status('notneeded', ds.save(recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_equal(old, new)
    assert ds.repo.dirty
    unlink(opj(ds.path, testfname))
    ok_clean_git(ds.path)

    # now let's check saving "upwards"
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty
    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo, untracked=['testnew'],
                 index_modified=['subsub'], head_modified=['testadded'])
    old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    subsubds.save(message="savingtestmessage", super_datasets=True)
    # this save actually didn't save anything in subsub (or anywhere),
    # because there were only untracked bits pending
    for old, new in zip(old_states, [d.repo.get_hexsha()
                                     for d in (ds, subds, subsubds)]):
        assert_equal(old, new)
    # but now we are saving this untracked bit specifically
    subsubds.save(message="savingtestmessage", path=['testnew2'],
                  super_datasets=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')

    # and if we try to save while being within that subsubds path
    subsubds.unlock('testnew2')
    create_tree(subsubds.path, {"testnew2": 'smth2'})

    # trying to replicate https://github.com/datalad/datalad/issues/1540
    subsubds.save(message="saving new changes", all_updated=True)  # no super
    with chpwd(subds.path):
        # no explicit dataset is provided by path is provided
        save(path=['subsub'], message='saving sub', super_datasets=True)
    # super should get it saved too
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'saving sub')

Ejemplo n.º 40

0

Mostrar archivo

Archivo: test_save.py Proyecto: gi114/datalad

def test_save(path):

    ds = Dataset(path)

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save(message="add a new file")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save(message="modified new_file.tst")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save(message="love rapunzel")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save(message="love marsians")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(op.join(path, fn), "w") as f:
            f.write(fn)

    ds.save([op.join(path, f) for f in files])
    # superfluous call to save (alll saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save(message="set of new files"))
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(op.join(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.save()
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # ensure modified subds is committed
    ds.save()
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds', version_tag='new_sub')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    tags = ds.repo.get_tags()
    ok_(len(tags) == 1)
    eq_(tags[0], dict(hexsha=ds.repo.get_hexsha(), name='new_sub'))
    # fails when retagged, like git does
    res = ds.save(version_tag='new_sub', on_failure='ignore')
    assert_status('error', res)
    assert_result_count(res,
                        1,
                        action='save',
                        type='dataset',
                        path=ds.path,
                        message=('cannot tag this version: %s',
                                 "fatal: tag 'new_sub' already exists"))

Ejemplo n.º 41

0

Mostrar archivo

Archivo: test_save.py Proyecto: loj/datalad

def test_path_arg_call(path):
    ds = create(path)
    for testfile in (ds.pathobj / 'abs.txt', ds.pathobj / 'rel.txt'):
        testfile.write_text(u'123')
        save(dataset=ds.path, path=[testfile.name], to_git=True)

Ejemplo n.º 42

0

Mostrar archivo

def _makeds(path, levels, ds=None, max_leading_dirs=2):
    """Create a hierarchy of datasets

    Used recursively, with current invocation generating datasets for the
    first level, and delegating sub-levels to recursive invocation

    Parameters
    ----------
    path : str
      Path to the top directory under which dataset will be created.
      If relative -- relative to current directory
    levels : list of list
      List of specifications for :func:`random.randint` call per each level.
    ds : Dataset, optional
      Super-dataset which would contain a new dataset (thus its path whould be
      a parent of path. Note that ds needs to be installed.
    max_leading_dirs : int, optional
      Up to how many leading directories withing a dataset could lead to a
      sub-dataset

    Yields
    ------
    str
       Path to the generated dataset(s)

    """
    # we apparently can't import api functionality within api
    from datalad.api import save
    # To simplify managing all the file paths etc
    if not isabs(path):
        path = abspath(path)
    # make it a git (or annex??) repository... ok - let's do randomly one or another ;)
    RepoClass = GitRepo if random.randint(0, 1) else AnnexRepo
    lgr.info("Generating repo of class %s under %s", RepoClass, path)
    repo = RepoClass(path, create=True)
    # let's create some dummy file and add it to the beast
    fn = opj(path, "file%d.dat" % random.randint(1, 1000))
    with open(fn, 'w') as f:
        f.write(fn)
    repo.add(fn, git=True)
    repo.commit(msg="Added %s" % fn)

    yield path

    if levels:
        # make a dataset for that one since we want to add sub datasets
        ds_ = Dataset(path)
        # Process the levels
        level, levels_ = levels[0], levels[1:]
        nrepos = random.randint(*level)  # how many subds to generate
        for irepo in range(nrepos):
            # we would like to have up to 2 leading dirs
            subds_path = opj(*(['d%i' % i
                                for i in range(random.randint(0, max_leading_dirs+1))]
                               + ['r%i' % irepo]))
            subds_fpath = opj(path, subds_path)
            # yield all under
            for d in _makeds(subds_fpath, levels_, ds=ds_):
                yield d

    if ds:
        assert ds.is_installed()
        out = save(
            path,
            dataset=ds,
        )

Ejemplo n.º 43

0

Mostrar archivo

Archivo: containers_add_dhub_tags.py Proyecto: datalad/datalad-container

def process_files(files):
    failed = []
    for line in fileinput.input(files):
        name, kind = parse_input(line)
        if kind == "namespace":
            try:
                repos = list(get_namespace_repos(name))
            except requests.HTTPError as exc:
                lgr.warning(
                    "Failed to list repositories for %s (status %s). Skipping",
                    name, exc.response.status_code)
                failed.append(name)
                continue
        else:
            repos = [name]

        target_architectures_re = re.compile(target_architectures)
        target_tags_re = re.compile(target_tags)
        for repo in repos:
            lgr.info("Working on %s", repo)
            try:
                registry = RepoRegistry(repo)
                #pprint(list(zip(sorted(_all_tags['latest'], key=lambda r: r['digest']), sorted(_all_tags['1.32.0'],
                # key=lambda r: r['digest']))))
                tag_images = dict(get_repo_tag_images(repo))

                # 'latest' tag is special in docker, it is the default one
                # which might typically point to some other release/version.
                # If we find that it is the case, we do not create a dedicated "latest"
                # image/datalad container -- we just add container entry pointing to that
                # one.  If there is no matching one -- we do get "latest"
                latest_matching_tag = None
                # NOTE: "master" is also often used to signal a moving target
                # it might, or not, correspond to tagged release.  I guess we are just
                # doomed to breed those
                if target_tags_re.match('latest'):
                    matching_tags = []
                    for tag, images in tag_images.items():
                        if tag == 'latest' or not target_tags_re.match(tag):
                            lgr.debug("Skipping tag %(tag)s")
                            continue

                        if images == tag_images['latest']:
                            matching_tags.append(tag)
                    if len(matching_tags) >= 1:
                        if len(matching_tags) > 1:
                            lgr.info(
                                "Multiple tags images match latest, taking the first: %s",
                                ', '.join(matching_tags))
                        latest_matching_tag = matching_tags[0]
                        lgr.info("Taking %s as the one for 'latest'",
                                 latest_matching_tag)
                else:
                    # TODO: if there is no latest, we should at least establish the
                    # convenient one for each tag
                    pass
                for tag, images in tag_images.items():
                    if tag == 'latest' and latest_matching_tag:
                        continue  # skip since we will handle it
                    if not target_tags_re.match(tag):
                        lgr.debug("Skipping tag %(tag)s")
                        continue
                    multiarch = len({i['architecture'] for i in images}) > 1
                    for image in images:
                        architecture = image['architecture']
                        if not target_architectures_re.match(architecture):
                            lgr.debug("Skipping architecture %(architecture)s",
                                      image)
                            continue
                        manifest = registry.get_manifest(image['digest'])
                        digest = manifest["config"]["digest"]
                        # yoh: if I got it right, it is actual image ID we see in docker images
                        assert digest.startswith("sha256:")
                        digest = digest[7:]
                        digest_short = digest[:12]  # use short version in name
                        last_pushed = image.get('last_pushed')
                        if last_pushed:
                            assert last_pushed.endswith('Z')
                            # take only date
                            last_pushed = last_pushed[:10].replace('-', '')
                            assert len(last_pushed) == 8
                        cleaner_repo = repo
                        # this is how it looks on hub.docker.com URL
                        if repo.startswith('library/'):
                            cleaner_repo = "_/" + cleaner_repo[len('library/'
                                                                   ):]
                        image_name = f"{cleaner_repo}/{tag}/"
                        if multiarch:
                            image_name += f"{architecture}-"
                        if last_pushed:
                            # apparently not in all, e.g. no for repronim/neurodocker
                            # may be None for those built on the hub?
                            image_name += f"{last_pushed}-"
                        image_name += f"{digest_short}"
                        dl_container_name = clean_container_name(
                            str(image_name))
                        image_path = Path("images") / image_name
                        url = f"dhub://{repo}:{tag}@{image['digest']}"
                        save_paths = []
                        if image_path.exists():
                            lgr.info("%s already exists, skipping adding",
                                     str(image_path))
                        else:
                            save_paths.append(
                                write_json(
                                    Path(str(image_path) + '.manifest.json'),
                                    manifest))
                            save_paths.append(
                                write_json(
                                    Path(str(image_path) + '.image.json'),
                                    image))
                            add_container(url, dl_container_name, image_path)
                            # TODO: either fix datalad-container for https://github.com/datalad/datalad-container/issues/98
                            # or here, since we have manifest, we can datalad download-url, and add-archive-content
                            # of the gzipped layers (but without untarring) - that should add datalad-archive
                            # urls to individual layers in the "saved" version
                            # TODO: make it in a single commit with add_container at least,
                            # or one commit for the whole repo sweep
                            save(
                                path=save_paths,
                                message=
                                f"Added manifest and image records for {dl_container_name}"
                            )
                        # TODO: ensure .datalad/config to have additional useful fields:
                        #  architecture, os, and manually "updateurl" since not added for
                        #  dhub:// ATM
                        if tag == latest_matching_tag and architecture == default_architecture:
                            # TODO remove section if exists, copy this one
                            lgr.warning(
                                "Tracking of 'latest' is not yet implemented")
            except requests.HTTPError as exc:
                lgr.warning(
                    "Failed processing %s. Skipping\n  status %s for %s", repo,
                    exc.response.status_code, exc.response.url)
                failed.append(name)
                continue
    return failed