Esempio n. 1
0
def test_surprise_subds(path):
    # https://github.com/datalad/datalad/issues/3139
    ds = create(path, force=True)
    # a lonely repo without any commit
    somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True)
    # a proper subdataset
    subds = create(op.join(path, 'd2', 'subds'), force=True)
    # save non-recursive
    ds.save(recursive=False)
    # the content of both subds and subrepo are not added to their
    # respective parent as no --recursive was given
    assert_repo_status(subds.path, untracked=['subfile'])
    assert_repo_status(somerepo.path, untracked=['subfile'])
    # however, while the subdataset is added (and reported as modified
    # because it content is still untracked) the subrepo
    # cannot be added (it has no commit)
    # worse: its untracked file add been added to the superdataset
    # XXX the next conditional really says: if the subrepo is not in an
    # adjusted branch: #datalad/3178 (that would have a commit)
    if not on_windows:
        assert_repo_status(ds.path, modified=['d2/subds'])
        assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile',
                  ds.repo.get_content_info())
    # with proper subdatasets, all evil is gone
    assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile',
                  ds.repo.get_content_info())
Esempio n. 2
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
Esempio n. 3
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    ok_clean_git(subds.path, annex=None)
    # top is too:
    ok_clean_git(ds.path, annex=None)
    ds.save('addsub')
    # now it is:
    ok_clean_git(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.add('sub3')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Esempio n. 4
0
def test_invalid_args(path, otherpath, alienpath):
    # source == path
    assert_raises(ValueError, clone, 'Zoidberg', path='Zoidberg')
    assert_raises(ValueError, clone, 'ssh://mars/Zoidberg', path='ssh://mars/Zoidberg')

    # "invalid URL" is a valid filepath... and since no clone to remote
    # is possible - we can just assume that it is the (legit) file path
    # which is provided, not a URL.  So both below should fail as any
    # other clone from a non-existing source and not for the reason of
    # "invalid something".  Behavior is similar to how Git performs - can
    # clone into a URL-like path.

    # install to an "invalid URL" path
    res = clone('Zoidberg', path='ssh://mars:Zoidberg', on_failure='ignore')
    assert_status('error', res)

    # install to a "remote location" path
    res = clone('Zoidberg', path='ssh://mars/Zoidberg', on_failure='ignore')
    assert_status('error', res)

    # make fake dataset
    ds = create(path)
    assert_raises(IncompleteResultsError, ds.clone, '/higherup.', 'Zoidberg')
    # make real dataset, try to install outside
    ds_target = create(opj(otherpath, 'target'))
    assert_raises(ValueError, ds_target.clone, ds.path, path=ds.path)
    assert_status('error', ds_target.clone(ds.path, path=alienpath, on_failure='ignore'))
Esempio n. 5
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'])

    # install doesn't add existing submodules -- add does that
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])
    ds.add(['ds1', 'ds3'])
    ok_clean_git(ds.path, annex=False)
    # those which succeeded should be saved now
    eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})

    # but if there was only a single installation requested -- it will be
    # InstallFailedError to stay consistent with single install behavior
    # TODO: unify at some point
    with assert_raises(InstallFailedError) as cme:
        ds.install('ds2')
    with assert_raises(InstallFailedError) as cme:
        ds.install('///nonexisting')
Esempio n. 6
0
def test_unlock_raises(path, path2, path3):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError,
                  unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    with swallow_logs(new_level=logging.WARNING) as cml:
        unlock(dataset=None, path=path2)
        assert_in("ignored paths that do not belong to any dataset: ['{0}'".format(path2),
                  cml.out)

    create(path=path, no_annex=True)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    AnnexRepo(path, create=True)
    with swallow_logs(new_level=logging.WARNING) as cml:
        ds.unlock(path="notexistent.txt")
        assert_in("ignored non-existing paths", cml.out)

    chpwd(_cwd)
Esempio n. 7
0
def test_add_subdataset(path, other):
    subds = create(opj(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # without a base dataset the next is interpreted as "add everything
    # in subds to subds"
    add(subds.path)
    ok_clean_git(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    ds.add(subds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=opj(ds.path, 'other'))
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.add('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(other_clone.is_installed)
    ds.get('other')
    ok_(other_clone.is_installed)
Esempio n. 8
0
def test_unlock_raises(path, path2, path3):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError,
                  unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    res = unlock(dataset=None, path=path2, result_xfm=None,
                 on_failure='ignore', return_type='item-or-list')
    eq_(res['message'], "path not associated with any dataset")
    eq_(res['path'], path2)

    create(path=path, no_annex=True)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    AnnexRepo(path, create=True)
    res = ds.unlock(path="notexistent.txt", result_xfm=None,
                    on_failure='ignore', return_type='item-or-list')
    eq_(res['message'], "path does not exist")

    chpwd(_cwd)
Esempio n. 9
0
def make_studyforrest_mockup(path):
    """Generate a dataset structure mimicking aspects of studyforrest.org

    Under the given path there are two directories:

    public - to be published datasets
    private - never to be published datasets

    The 'public' directory itself is a superdataset, the 'private' directory
    is just a directory that contains standalone datasets in subdirectories.
    """
    public = create(opj(path, 'public'), description="umbrella dataset")
    # the following tries to capture the evolution of the project
    phase1 = public.create('phase1',
                           description='old-style, no connection to RAW')
    structural = public.create('structural', description='anatomy')
    tnt = public.create('tnt', description='image templates')
    tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
    tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True)
    aligned = public.create('aligned', description='aligned image data')
    aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
    aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
    # new acquisition
    labet = create(opj(path, 'private', 'labet'), description="raw data ET")
    phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI")
    phase2 = public.create('phase2',
                           description='new-style, RAW connection')
    phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True)
    phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True)
    # add to derivatives
    tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
    aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
    # never to be published media files
    media = create(opj(path, 'private', 'media'), description="raw data ET")
    # assuming all annotations are in one dataset (in reality this is also
    # a superdatasets with about 10 subdatasets
    annot = public.create('annotations', description='stimulus annotation')
    annot.clone(source=media.path, path=opj('src', 'media'), reckless=True)
    # a few typical analysis datasets
    # (just doing 3, actual status quo is just shy of 10)
    # and also the real goal -> meta analysis
    metaanalysis = public.create('metaanalysis', description="analysis of analyses")
    for i in range(1, 3):
        ana = public.create('analysis{}'.format(i),
                            description='analysis{}'.format(i))
        ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True)
        ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
        ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
        # link to metaanalysis
        metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)),
                           reckless=True)
        # simulate change in an input (but not raw) dataset
        create_tree(
            aligned.path,
            {'modification{}.txt'.format(i): 'unique{}'.format(i)})
        aligned.save()
    # finally aggregate data
    aggregate = public.create('aggregate', description='aggregate data')
    aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
Esempio n. 10
0
 def time_make_studyforrest_mockup(self):
     path = self.path
     # Carries a copy of the
     # datalad.tests.utils_testdatasets.py:make_studyforrest_mockup
     # as of 0.12.0rc2-76-g6ba6d53b
     # A copy is made so we do not reflect in the benchmark results changes
     # to that helper's code.  This copy only tests on 2 not 3 analyses
     # subds
     public = create(opj(path, 'public'), description="umbrella dataset")
     # the following tries to capture the evolution of the project
     phase1 = public.create('phase1',
                            description='old-style, no connection to RAW')
     structural = public.create('structural', description='anatomy')
     tnt = public.create('tnt', description='image templates')
     tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
     tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True)
     aligned = public.create('aligned', description='aligned image data')
     aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
     aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
     # new acquisition
     labet = create(opj(path, 'private', 'labet'), description="raw data ET")
     phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI")
     phase2 = public.create('phase2',
                            description='new-style, RAW connection')
     phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True)
     phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True)
     # add to derivatives
     tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
     aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
     # never to be published media files
     media = create(opj(path, 'private', 'media'), description="raw data ET")
     # assuming all annotations are in one dataset (in reality this is also
     # a superdatasets with about 10 subdatasets
     annot = public.create('annotations', description='stimulus annotation')
     annot.clone(source=media.path, path=opj('src', 'media'), reckless=True)
     # a few typical analysis datasets
     # (just doing 2, actual status quo is just shy of 10)
     # and also the real goal -> meta analysis
     metaanalysis = public.create('metaanalysis', description="analysis of analyses")
     for i in range(1, 2):
         ana = public.create('analysis{}'.format(i),
                             description='analysis{}'.format(i))
         ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True)
         ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
         ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
         # link to metaanalysis
         metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)),
                            reckless=True)
         # simulate change in an input (but not raw) dataset
         create_tree(
             aligned.path,
             {'modification{}.txt'.format(i): 'unique{}'.format(i)})
         aligned.add('.')
     # finally aggregate data
     aggregate = public.create('aggregate', description='aggregate data')
     aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
Esempio n. 11
0
def test_remove_recreation(path):

    # test recreation is possible and doesn't conflict with in-memory
    # remainings of the old instances
    # see issue #1311

    ds = create(path)
    ds.remove()
    ds = create(path)
    ok_clean_git(ds.path)
    ok_(ds.is_installed())
Esempio n. 12
0
def test_create_curdir(path, path2):
    with chpwd(path, mkdir=True):
        create()
    ds = Dataset(path)
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    with chpwd(path2, mkdir=True):
        create(no_annex=True)
    ds = Dataset(path2)
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=False)
    ok_(op.exists(op.join(ds.path, '.noannex')))
Esempio n. 13
0
def test_create_withprocedure(path):
    # first without
    ds = create(path)
    assert(not op.lexists(op.join(ds.path, 'README.rst')))
    ds.remove()
    assert(not op.lexists(ds.path))
    # now for reals...
    ds = create(
        # needs to identify the dataset, otherwise post-proc
        # procedure doesn't know what to run on
        dataset=path,
        proc_post=[['cfg_metadatatypes', 'xmp', 'datacite']])
    assert_repo_status(path)
    ds.config.reload()
    eq_(ds.config['datalad.metadata.nativetype'], ('xmp', 'datacite'))
Esempio n. 14
0
def test_add_subdataset(path):
    subds = create(opj(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.get_subdatasets())
    # without a base dataset the next is interpreted as "add everything
    # in subds to subds"
    add(subds.path)
    ok_clean_git(subds.path)
    assert_not_in('dir', ds.get_subdatasets())
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    ds.add(subds.path)
    assert_in('dir', ds.get_subdatasets())
Esempio n. 15
0
def test_get_metadata(path1, path2):
    for p in (path1, path2):
        print('PATH')
        ds = create(p, force=True)
        ds.save()
        meta = MetadataExtractor(
                ds,
                _get_metadatarelevant_paths(ds, []))._get_dataset_metadata()
        assert_equal(
            dumps(meta, sort_keys=True, indent=2),
            """\
{
  "author": [
    "Last1, First1",
    "Last2, First2"
  ],
  "citation": [
    "10.1016/j.cub.2011.08.031"
  ],
  "description": "Some long description.",
  "formats": [
    "application/matlab",
    "NIFTY"
  ],
  "name": "CRCNS.org xxx-1",
  "sameas": "10.6080/K0QN64NG",
  "shortdescription": "Main title",
  "tag": [
    "Neuroscience",
    "fMRI"
  ],
  "version": "1.0"
}""")
Esempio n. 16
0
def test_saving_prior(topdir):
    # the problem is that we might be saving what is actually needed to be
    # "created"

    # we would like to place this structure into a hierarchy of two datasets
    # so we create first top one
    ds1 = create(topdir, force=True)
    # and everything is ok, stuff is not added BUT ds1 will be considered dirty
    assert_repo_status(ds1.path, untracked=['ds2'])
    # And then we would like to initiate a sub1 subdataset
    ds2 = create('ds2', dataset=ds1, force=True)
    # But what will happen is file1.txt under ds2 would get committed first into
    # ds1, and then the whole procedure actually crashes since because ds2/file1.txt
    # is committed -- ds2 is already known to git and it just pukes with a bit
    # confusing    'ds2' already exists in the index
    assert_in('ds2', ds1.subdatasets(result_xfm='relpaths'))
Esempio n. 17
0
def test_get_subdatasets_types(path):
    from datalad.api import create
    ds = create(path)
    ds.create('1')
    ds.create('true')
    # no types casting should happen
    eq_(ds.subdatasets(result_xfm='relpaths'), ['1', 'true'])
Esempio n. 18
0
def test_get_containing_subdataset(path):

    ds = create(path, force=True)
    ds.add(path='test.txt')
    ds.save("Initial commit")
    subds = ds.create("sub")
    subsubds = subds.create("subsub")

    eq_(ds.get_containing_subdataset(opj("sub", "subsub", "some")).path, subsubds.path)
    # the top of a subdataset belongs to the subdataset
    eq_(ds.get_containing_subdataset(opj("sub", "subsub")).path, subsubds.path)
    eq_(get_dataset_root(opj(ds.path, "sub", "subsub")), subsubds.path)
    eq_(ds.get_containing_subdataset(opj("sub", "some")).path, subds.path)
    eq_(ds.get_containing_subdataset("sub").path, subds.path)
    eq_(ds.get_containing_subdataset("some").path, ds.path)
    # make sure the subds is found, even when it is not present, but still
    # known
    shutil.rmtree(subds.path)
    eq_(ds.get_containing_subdataset(opj("sub", "some")).path, subds.path)
    eq_(ds.get_containing_subdataset("sub").path, subds.path)
    # # but now GitRepo disagrees...
    eq_(get_dataset_root(opj(ds.path, "sub")), ds.path)
    # and this stays, even if we give the mount point directory back
    os.makedirs(subds.path)
    eq_(get_dataset_root(opj(ds.path, "sub")), ds.path)

    outside_path = opj(os.pardir, "somewhere", "else")
    assert_raises(PathOutsideRepositoryError, ds.get_containing_subdataset,
                  outside_path)
    assert_raises(PathOutsideRepositoryError, ds.get_containing_subdataset,
                  opj(os.curdir, outside_path))
    assert_raises(PathOutsideRepositoryError, ds.get_containing_subdataset,
                  abspath(outside_path))
Esempio n. 19
0
def test_install_crcns(tdir, ds_path):
    with chpwd(tdir):
        with swallow_logs(new_level=logging.INFO) as cml:
            install("all-nonrecursive", source='///')
            # since we didn't log decorations such as log level atm while
            # swallowing so lets check if exit code is returned or not
            # I will test both
            assert_not_in('ERROR', cml.out)
            # below one must not fail alone! ;)
            assert_not_in('with exit code', cml.out)

        # should not hang in infinite recursion
        with chpwd('all-nonrecursive'):
            get("crcns")
        ok_(exists(_path_("all-nonrecursive/crcns/.git/config")))
        # and we could repeat installation and get the same result
        ds1 = install(_path_("all-nonrecursive/crcns"))
        ds2 = Dataset('all-nonrecursive').install('crcns')
        ok_(ds1.is_installed())
        eq_(ds1, ds2)
        eq_(ds1.path, ds2.path)  # to make sure they are a single dataset

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.install("///crcns")
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.get_subdatasets(absolute=True))
Esempio n. 20
0
def test_get_flexible_source_candidates_for_submodule(t, t2):
    f = _get_flexible_source_candidates_for_submodule
    # for now without mocking -- let's just really build a dataset
    from datalad.api import create
    from datalad.api import install
    ds = create(t)
    clone = install(t2, source=t)

    # first one could just know about itself or explicit url provided
    sshurl = 'ssh://e.c'
    httpurl = 'http://e.c'
    sm_httpurls = [httpurl, httpurl + '/.git']
    eq_(f(ds, 'sub'), [])
    eq_(f(ds, 'sub', sshurl), [sshurl])
    eq_(f(ds, 'sub', httpurl), sm_httpurls)
    eq_(f(ds, 'sub', None), [])  # otherwise really we have no clue were to get from

    # but if we work on dsclone then it should also add urls deduced from its
    # own location default remote for current branch
    eq_(f(clone, 'sub'), [t + '/sub'])
    eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl])
    eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls)
    eq_(f(clone, 'sub'), [t + '/sub'])  # otherwise really we have no clue were to get from
    # TODO: check that http:// urls for the dataset itself get resolved

    # TODO: many more!!
Esempio n. 21
0
def test_uninstall_without_super(path):
    # a parent dataset with a proper subdataset, and another dataset that
    # is just placed underneath the parent, but not an actual subdataset
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    nosub = create(opj(parent.path, 'nosub'))
    ok_clean_git(nosub.path)
    subreport = parent.subdatasets()
    assert_result_count(subreport, 1, path=sub.path)
    assert_result_count(subreport, 0, path=nosub.path)
    # it should be possible to uninstall the proper subdataset, even without
    # explicitly calling the uninstall methods of the parent -- things should
    # be figured out by datalad
    uninstall(sub.path)
    assert not sub.is_installed()
    # no present subdatasets anymore
    subreport = parent.subdatasets()
    assert_result_count(subreport, 1)
    assert_result_count(subreport, 1, path=sub.path, state='absent')
    assert_result_count(subreport, 0, path=nosub.path)
    # but we should fail on an attempt to uninstall the non-subdataset
    res = uninstall(nosub.path, on_failure='ignore')
    assert_result_count(
        res, 1, path=nosub.path, status='error',
        message="will not uninstall top-level dataset (consider `remove` command)")
Esempio n. 22
0
def test_here(path):
    # few smoke tests regarding the 'here' sibling
    ds = create(path)
    res = ds.siblings(
        'query',
        on_failure='ignore',
        result_renderer=None)
    assert_status('ok', res)
    assert_result_count(res, 1)
    assert_result_count(res, 1, name='here')
    here = res[0]
    eq_(ds.repo.uuid, here['annex-uuid'])
    assert_in('annex-description', here)
    assert_in('annex-bare', here)
    assert_in('available_local_disk_space', here)

    # set a description
    res = ds.siblings(
        'configure',
        name='here',
        description='very special',
        on_failure='ignore',
        result_renderer=None)
    assert_status('ok', res)
    assert_result_count(res, 1)
    assert_result_count(res, 1, name='here')
    here = res[0]
    eq_('very special', here['annex-description'])
Esempio n. 23
0
def test_get_flexible_source_candidates_for_submodule(t, t2):
    f = _get_flexible_source_candidates_for_submodule
    # for now without mocking -- let's just really build a dataset
    ds = create(t)
    clone = install(
        t2, source=t,
        result_xfm='datasets', return_type='item-or-list')

    # first one could just know about itself or explicit url provided
    sshurl = 'ssh://e.c'
    httpurl = 'http://e.c'
    # Expansion with '/.git' no longer done in this helper
    #sm_httpurls = [httpurl, httpurl + '/.git']
    sm_httpurls = [httpurl]
    eq_(f(ds, 'sub'), [])
    eq_(f(ds, 'sub', sshurl), [sshurl])
    eq_(f(ds, 'sub', httpurl), sm_httpurls)
    eq_(f(ds, 'sub', None), [])  # otherwise really we have no clue were to get from

    # but if we work on dsclone then it should also add urls deduced from its
    # own location default remote for current branch
    eq_(f(clone, 'sub'), [t + '/sub'])
    eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl])
    eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls)
    eq_(f(clone, 'sub'), [t + '/sub'])  # otherwise really we have no clue were to get from
Esempio n. 24
0
def test_implicit_install(src, dst):

    origin_top = create(src)
    origin_sub = origin_top.create("sub")
    origin_subsub = origin_sub.create("subsub")
    with open(opj(origin_top.path, "file1.txt"), "w") as f:
        f.write("content1")
    origin_top.add("file1.txt")
    with open(opj(origin_sub.path, "file2.txt"), "w") as f:
        f.write("content2")
    origin_sub.add("file2.txt")
    with open(opj(origin_subsub.path, "file3.txt"), "w") as f:
        f.write("content3")
    origin_subsub.add("file3.txt")
    origin_top.save(recursive=True)

    # first, install toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())

    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # fail on obscure non-existing one
    assert_raises(IncompleteResultsError, ds.install, source='obscure')

    # install 3rd level and therefore implicitly the 2nd:
    result = ds.install(path=opj("sub", "subsub"))
    ok_(sub.is_installed())
    ok_(subsub.is_installed())
    # but by default implicit results are not reported
    eq_(result, subsub)

    # fail on obscure non-existing one in subds
    assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure'))

    # clean up, the nasty way
    rmtree(dst, chmod_files=True)
    ok_(not exists(dst))

    # again first toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())
    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # now implicit but without an explicit dataset to install into
    # (deriving from CWD):
    with chpwd(dst):
        # don't ask for the file content to make return value comparison
        # simpler
        result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets')
        ok_(sub.is_installed())
        ok_(subsub.is_installed())
        eq_(result, [sub, subsub])
Esempio n. 25
0
def test_bf2541(path):
    ds = create(path)
    subds = ds.create('sub')
    ok_clean_git(ds.path)
    os.symlink('sub', op.join(ds.path, 'symlink'))
    with chpwd(ds.path):
        res = add('.', recursive=True)
    ok_clean_git(ds.path)
Esempio n. 26
0
def test_failed_clone(dspath):
    ds = create(dspath)
    res = ds.clone("http://nonexistingreallyanything.datalad.org/bla", "sub",
                   on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone from any candidate source URL. '
                   'Encountered errors per each url were: %s',
                   res)
Esempio n. 27
0
def test_bf2541(path):
    ds = create(path)
    subds = ds.create('sub')
    assert_repo_status(ds.path)
    os.symlink('sub', op.join(ds.path, 'symlink'))
    with chpwd(ds.path):
        res = save(recursive=True)
    assert_repo_status(ds.path)
Esempio n. 28
0
def test_remove_nowhining(path):
    # when removing a dataset under a dataset (but not a subdataset)
    # should not provide a meaningless message that something was not right
    ds = create(path)
    # just install/clone inside of it
    subds_path = _path_(path, 'subds')
    install(subds_path, source=path)
    remove(subds_path)  # should remove just fine
Esempio n. 29
0
def test_invalid_args(path):
    assert_raises(IncompleteResultsError, install, 'Zoidberg', source='Zoidberg')
    # install to an invalid URL
    assert_raises(ValueError, install, 'ssh://mars:Zoidberg', source='Zoidberg')
    # install to a remote location
    assert_raises(ValueError, install, 'ssh://mars/Zoidberg', source='Zoidberg')
    # make fake dataset
    ds = create(path)
    assert_raises(IncompleteResultsError, install, '/higherup.', 'Zoidberg', dataset=ds)
Esempio n. 30
0
 def get_baseline(p):
     ds = Dataset(p).create()
     with chpwd(ds.path):
         subds = create('sub')
     ds.add('sub', save=False)
     create_tree(subds.path, {"staged": ""})
     subds.add("staged", save=False)
     # subdataset has staged changes.
     ok_clean_git(ds.path, index_modified=['sub'])
     return ds
Esempio n. 31
0
def test_install_consistent_state(src, dest, dest2, dest3):
    # if we install a dataset, where sub-dataset "went ahead" in that branch,
    # while super-dataset was not yet updated (e.g. we installed super before)
    # then it is desired to get that default installed branch to get to the
    # position where previous location was pointing to.
    # It is indeed a mere heuristic which might not hold the assumption in some
    # cases, but it would work for most simple and thus mostly used ones
    ds1 = create(src)
    sub1 = ds1.create('sub1')

    def check_consistent_installation(ds):
        datasets = [ds] + list(
            map(
                Dataset,
                ds.subdatasets(
                    recursive=True, fulfilled=True, result_xfm='paths')))
        assert len(datasets) == 2  # in this test
        for ds in datasets:
            # all of them should be in master branch
            eq_(ds.repo.get_active_branch(), "master")
            # all of them should be clean, so sub should be installed in a "version"
            # as pointed by the super
            ok_(not ds.repo.dirty)

    dest_ds = install(dest, source=src)
    # now we progress sub1 by adding sub2
    subsub2 = sub1.create('sub2')

    # and progress subsub2 forward to stay really thorough
    put_file_under_git(subsub2.path, 'file.dat', content="data")
    subsub2.save("added a file")  # above function does not commit

    # just installing a submodule -- apparently different code/logic
    # but also the same story should hold - we should install the version pointed
    # by the super, and stay all clean
    dest_sub1 = dest_ds.install('sub1')
    check_consistent_installation(dest_ds)

    # So now we have source super-dataset "dirty" with sub1 progressed forward
    # Our install should try to "retain" consistency of the installation
    # whenever possible.

    # install entire hierarchy without specifying dataset
    # no filter, we want full report
    dest2_ds = install(dest2, source=src, recursive=True, result_filter=None)
    check_consistent_installation(dest2_ds[0])  # [1] is the subdataset

    # install entire hierarchy by first installing top level ds
    # and then specifying sub-dataset
    dest3_ds = install(dest3, source=src, recursive=False)
    # and then install both submodules recursively while pointing
    # to it based on dest3_ds
    dest3_ds.install('sub1', recursive=True)
    check_consistent_installation(dest3_ds)
Esempio n. 32
0
def test_clone_crcns(tdir, ds_path):
    with chpwd(tdir):
        res = clone('///', path="all-nonrecursive", on_failure='ignore')
        assert_status('ok', res)

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.clone("///crcns", result_xfm='datasets', return_type='item-or-list')
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.subdatasets(result_xfm='paths'))
Esempio n. 33
0
def test_bf1886(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = create(opj(parent.path, 'sub2'))
    os.symlink(
        opj(pardir, pardir, 'sub2'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = create(opj(parent.path, 'sub3'))
    os.symlink(
        opj(pardir, pardir, 'sub3'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        rev_save([opj(parent.path, 'sub3'),
                  opj(parent.path, 'subdir', 'subsubdir', 'upup3')])
    # in contrast to `add` only operates on a single top-level dataset
    # although it is not specified, it get's discovered based on the PWD
    # the logic behind that feels a bit shaky
    # consult discussion in https://github.com/datalad/datalad/issues/3230
    # if this comes up as an issue at some point
    ok_clean_git(parent.path)
Esempio n. 34
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    res = ds.subdatasets()
    assert_result_count(res, 1)
    assert_result_count(
        res,
        1,
        # essentials
        path=op.join(ds.path, 'dir'),
        gitmodule_url='./dir',
        gitmodule_name='dir',
        # but also the branch, by default
        gitmodule_branch='master',
    )
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
Esempio n. 35
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'],
                   on_failure='continue')

    # install doesn't add existing submodules -- add does that
    ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/'])
    ds.add(['ds1', 'ds3'])
    ok_clean_git(ds.path, annex=None)
    # those which succeeded should be saved now
    eq_(ds.subdatasets(result_xfm='relpaths'), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(r.get('source_url', r['path']) for r in cme.exception.failed),
        {'///nonexisting', _path_(top_path, 'ds2')})
def test_crawl_autoaddtext(ind, topurl, outd):
    ds = create(outd, text_no_annex=True)
    with chpwd(outd):  # TODO -- dataset argument
        crawl_init(
            {'url': topurl, 'a_href_match_': '.*'}
            , save=True
            , template='simple_with_archives')
        crawl()
    ok_clean_git(outd)
    ok_file_under_git(outd, "anothertext", annexed=False)
    ok_file_under_git(outd, "d/textfile", annexed=False)
    ok_file_under_git(outd, "d/tooshort", annexed=True)
Esempio n. 37
0
def test_create_withcfg(path):
    ds = create(
        dataset=path,
        cfg_proc=['yoda'])
    assert_repo_status(path)
    assert (ds.pathobj / 'README.md').exists()

    # If we are creating a dataset within a reference dataset, we save _after_
    # the procedure runs.
    ds.create('subds', cfg_proc=['yoda'])
    assert_repo_status(path)
    assert (ds.pathobj / 'subds' / 'README.md').exists()
Esempio n. 38
0
def test_backup_archive(path, objtree, archivremote):
    """Similar to test_archive_layout(), but not focused on
    compatibility with the directory-type special remote. Instead,
    it tests build a second RIA remote from an existing one, e.g.
    for backup purposes.
    """
    ds = create(path)
    setup_archive_remote(ds.repo, objtree)
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # copy files into the RIA archive
    ds.repo.copy_to('.', 'archive')

    targetpath = Path(archivremote) / ds.id[:3] / ds.id[3:] / 'archives'
    targetpath.mkdir(parents=True)
    subprocess.run(
        ['7z', 'u', str(targetpath / 'archive.7z'), '.'],
        cwd=str(Path(objtree) / ds.id[:3] / ds.id[3:] / 'annex' / 'objects'),
    )
    initexternalremote(ds.repo,
                       '7z',
                       'ria',
                       config={'base-path': archivremote})
    # wipe out the initial RIA remote (just for testing if the upcoming
    # one can fully take over)
    shutil.rmtree(objtree)
    # fsck to make git-annex aware of the loss
    assert_status('error', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='archive', fast=True)
    ])
    # now only available "here"
    eq_(len(ds.repo.whereis('one.txt')), 1)

    # make the backup archive known
    initexternalremote(ds.repo,
                       'backup',
                       'ria',
                       config={'base-path': archivremote})
    # now fsck the new remote to get the new special remote indexed
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='backup', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # now we can drop all content locally, reobtain it, and survive an
    # fsck
    ds.drop('.')
    ds.get('.')
    assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])
def test_gitannex_remoteio_url(path, objtree):
    ds = create(path)
    initexternalremote(
        ds.repo,
        'ria-remote',
        'ria',
        config={'url': "ria+ssh://datalad-test:{}".format(objtree)})
    ds.repo._run_annex_command(
        'testremote',
        annex_options=['ria-remote'],
        log_stdout=False,
    )
Esempio n. 40
0
def test_bids(dspath, records):
    # make fake UKB datarecord downloads
    make_datarecord_zips('12345', records)

    # init dataset
    ds = create(dspath)
    ds.ukb_init('12345', ['20227_2_0', '25747_2_0', '25748_2_0', '25748_3_0'],
                bids=True)
    # dummy key file, no needed to bypass tests
    ds.config.add('datalad.ukbiobank.keyfile', 'dummy', where='local')
    bin_dir = make_ukbfetch(ds, records)

    # put fake ukbfetch in the path and run
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True)

    bids_files = ds.repo.get_files('incoming-bids')
    master_files = ds.repo.get_files()
    for i in [
            'ses-2/func/sub-12345_ses-2_task-rest_bold.nii.gz',
            'ses-2/non-bids/fMRI/sub-12345_ses-2_task-hariri_eprime.txt',
            'ses-3/non-bids/fMRI/sub-12345_ses-3_task-hariri_eprime.txt'
    ]:
        assert_in(i, bids_files)
        assert_in(i, master_files)

    # run again, nothing bad happens
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True, force=True)

    bids_files = ds.repo.get_files('incoming-bids')
    master_files = ds.repo.get_files()
    for i in [
            'ses-2/func/sub-12345_ses-2_task-rest_bold.nii.gz',
            'ses-2/non-bids/fMRI/sub-12345_ses-2_task-hariri_eprime.txt',
            'ses-3/non-bids/fMRI/sub-12345_ses-3_task-hariri_eprime.txt'
    ]:
        assert_in(i, bids_files)
        assert_in(i, master_files)

    # now re-init with a different record subset and rerun
    ds.ukb_init('12345', ['25747_2_0', '25748_2_0', '25748_3_0'],
                bids=True,
                force=True)
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True, force=True)
Esempio n. 41
0
def _test_drop(path, drop_immediately):
    s3url = 's3://datalad-test0-nonversioned'
    providers = get_test_providers(s3url)  # to verify having s3 credentials
    # vcr tape is getting bound to the session object, so we need to
    # force re-establishing the session for the bucket.
    # TODO (in datalad): make a dedicated API for that, now too obscure
    _ = providers.get_status(s3url, allow_old_session=False)
    create(path)
    # unfortunately this doesn't work without force dropping since I guess vcr
    # stops and then gets queried again for the same tape while testing for
    # drop :-/
    with chpwd(path):
        crawl_init(
            template="simple_s3",
            args=dict(
                bucket="datalad-test0-nonversioned",
                drop=True,
                drop_force=True,  # so test goes faster
                drop_immediately=drop_immediately,
            ),
            save=True)
    if drop_immediately:
        # cannot figure out but taping that interaction results in
        # git annex addurl  error.  No time to figure it out
        # so we just crawl without vcr for now. TODO: figure out WTF
        with chpwd(path):
            crawl()
    else:
        with externals_use_cassette(
                'test_simple_s3_test0_nonversioned_crawl_ext'
                + ('_immediately' if drop_immediately else '')), \
                chpwd(path):
            crawl()
    # test that all was dropped
    repo = AnnexRepo(path, create=False)
    files = glob(_path_(path, '*'))
    eq_(len(files), 8)
    for f in files:
        assert_false(repo.file_has_content(f))
Esempio n. 42
0
def test_bf1886(path):
    parent = Dataset(path).create()
    parent.create('sub')
    assert_repo_status(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', op.join(parent.path, 'down'))
    parent.save('down')
    assert_repo_status(parent.path)
    # now symlink pointing up
    os.makedirs(op.join(parent.path, 'subdir', 'subsubdir'))
    os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up'))
    parent.save(op.join('subdir', 'up'))
    # 'all' to avoid the empty dir being listed
    assert_repo_status(parent.path, untracked_mode='all')
    # now symlink pointing 2xup, as in #1886
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.save(op.join('subdir', 'subsubdir', 'upup'))
    assert_repo_status(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    create(op.join(parent.path, 'sub2'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub2'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')])
    assert_repo_status(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    create(op.join(parent.path, 'sub3'))
    os.symlink(
        op.join(op.pardir, op.pardir, 'sub3'),
        op.join(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(op.join(parent.path, 'subdir', 'subsubdir')):
        save([op.join(parent.path, 'sub3'),
              op.join(parent.path, 'subdir', 'subsubdir', 'upup3')])
    assert_repo_status(parent.path)
Esempio n. 43
0
def client(tmpdir):
    ds = create(tmpdir.strpath)
    res = webapp(
        #'example_metadata',
        dataset=ds.path,
        mode='dry-run',
        return_type='item-or-list',
    )
    app = res['app']

    client = app.test_client()

    yield client
Esempio n. 44
0
def test_arg_missing(path, path2):
    # test fix for gh-3553
    ds = create(path)
    assert_raises(
        InsufficientArgumentsError,
        ds.siblings,
        'add',
        url=path2,
    )
    assert_status(
        'ok',
        ds.siblings(
            'add', url=path2, name='somename'))
def test_gitannex_remoteio(path, objtree):
    ds = create(path)
    initexternalremote(
        ds.repo, 'ria-remote', 'ria',
        config={
            'base-path': objtree,
            'ssh-host': 'datalad-test',
        })
    ds.repo._run_annex_command(
        'testremote',
        annex_options=['ria-remote'],
        log_stdout=False,
    )
Esempio n. 46
0
def test_create_fake_dates(path):
    ds = create(path, fake_dates=True)

    ok_(ds.config.getbool("datalad", "fake-dates"))
    ok_(ds.repo.fake_dates_enabled)

    # Another instance detects the fake date configuration.
    ok_(Dataset(path).repo.fake_dates_enabled)

    first_commit = ds.repo.get_revisions(options=["--reverse", "--all"])[0]

    eq_(ds.config.obtain("datalad.fake-dates-start") + 1,
        int(ds.repo.format_commit("%ct", first_commit)))
Esempio n. 47
0
def test_install_subds_with_space(opath, tpath):
    ds = create(opath)
    ds.create('sub ds')
    # works even now, boring
    # install(tpath, source=opath, recursive=True)
    if on_windows:
        # on windows we cannot simply prepend localhost: to a path
        # and get a working sshurl...
        install(tpath, source=opath, recursive=True)
    else:
        # do via ssh!
        install(tpath, source="localhost:" + opath, recursive=True)
    assert Dataset(opj(tpath, 'sub ds')).is_installed()
Esempio n. 48
0
def test_dataset_local_mode(path=None):
    ds = create(path)
    # any sensible (and also our CI) test environment(s) should have this
    assert_in('user.name', ds.config)
    # from .datalad/config
    assert_in('datalad.dataset.id', ds.config)
    # from .git/config
    assert_in('annex.version', ds.config)
    # now check that dataset-local mode doesn't have the global piece
    cfg = ConfigManager(ds, source='branch-local')
    assert_not_in('user.name', cfg)
    assert_in('datalad.dataset.id', cfg)
    assert_in('annex.version', cfg)
Esempio n. 49
0
def test_cfg_override(path):
    with chpwd(path):
        # control
        out, err = Runner()('datalad wtf -s some', shell=True)
        assert_not_in('datalad.dummy: this', out)
        # ensure that this is not a dataset's cfg manager
        assert_not_in('datalad.dataset.id', out)
        # env var
        if on_windows:
            cmd_str = 'set DATALAD_DUMMY=this&& datalad wtf -s some'
        else:
            cmd_str = 'DATALAD_DUMMY=this datalad wtf -s some'
        out, err = Runner()(cmd_str, shell=True)
        assert_in('datalad.dummy: this', out)
        # cmdline arg
        out, err = Runner()('datalad -c datalad.dummy=this wtf -s some',
                            shell=True)
        assert_in('datalad.dummy: this', out)

        # now create a dataset in the path. the wtf plugin will switch to
        # using the dataset's config manager, which must inherit the overrides
        create(dataset=path)
        # control
        out, err = Runner()('datalad wtf -s some', shell=True)
        assert_not_in('datalad.dummy: this', out)
        # ensure that this is a dataset's cfg manager
        assert_in('datalad.dataset.id', out)
        # env var
        if on_windows:
            cmd_str = 'set DATALAD_DUMMY=this&& datalad wtf -s some'
        else:
            cmd_str = 'DATALAD_DUMMY=this datalad wtf -s some'
        out, err = Runner()(cmd_str, shell=True)
        assert_in('datalad.dummy: this', out)
        # cmdline arg
        out, err = Runner()('datalad -c datalad.dummy=this wtf -s some',
                            shell=True)
        assert_in('datalad.dummy: this', out)
Esempio n. 50
0
def test_save_partial_commit_shrinking_annex(path):
    # This is a variation on the test above. The main difference is that there
    # are other staged changes in addition to the unlocked filed.
    ds = create(path, force=True)
    ds.save()
    assert_repo_status(ds.path)
    ds.unlock(path="foo")
    create_tree(ds.path, tree={"foo": "a", "staged": ""}, remove_existing=True)
    # Even without this staged change, a plain 'git commit -- foo' would fail
    # with git-annex's partial index error, but save (or more specifically
    # GitRepo.save_) drops the pathspec if there are no staged changes.
    ds.repo.add("staged", git=True)
    ds.save(path="foo")
    assert_repo_status(ds.path, added=["staged"])
Esempio n. 51
0
def test_create_fake_dates(path):
    ds = create(path, fake_dates=True)

    ok_(ds.config.getbool("datalad", "fake-dates"))
    ok_(ds.repo.fake_dates_enabled)

    # Another instance detects the fake date configuration.
    ok_(Dataset(path).repo.fake_dates_enabled)

    first_commit = ds.repo.repo.commit(
        ds.repo.repo.git.rev_list("--reverse", "--all").split()[0])

    eq_(ds.config.obtain("datalad.fake-dates-start") + 1,
        first_commit.committed_date)
Esempio n. 52
0
def test_clone_isnt_a_smartass(origin_path, path):
    origin = create(origin_path)
    cloned = clone(origin, path,
                   result_xfm='datasets', return_type='item-or-list')
    with chpwd(path):
        # no were are inside a dataset clone, and we make another one
        # we do not want automatic subdatasetification without given a dataset
        # explicitely
        clonedsub = clone(origin, 'testsub',
                          result_xfm='datasets', return_type='item-or-list')
    # correct destination
    assert clonedsub.path.startswith(path)
    # no subdataset relation
    eq_(cloned.subdatasets(), [])
Esempio n. 53
0
def test_drop(path):
    get_test_providers('s3://datalad-test0-nonversioned')  # to verify having s3 credentials
    create(path)
    # unfortunately this doesn't work without force dropping since I guess vcr
    # stops and then gets queried again for the same tape while testing for
    # drop :-/
    with externals_use_cassette('test_simple_s3_test0_nonversioned_crawl_ext'), \
         chpwd(path):
        crawl_init(template="simple_s3",
                   args=dict(
                       bucket="datalad-test0-nonversioned",
                       drop=True,
                       drop_force=True  # so test goes faster
                   ),
                   save=True
                   )
        crawl()
    # test that all was dropped
    repo = AnnexRepo(path, create=False)
    files = glob(_path_(path, '*'))
    eq_(len(files), 8)
    for f in files:
        assert_false(repo.file_has_content(f))
Esempio n. 54
0
def test_dataset_systemglobal_mode(path=None):
    ds = create(path)
    # any sensible (and also our CI) test environment(s) should have this
    assert_in('user.name', ds.config)
    # from .datalad/config
    assert_in('datalad.dataset.id', ds.config)
    # from .git/config
    assert_in('annex.version', ds.config)
    with chpwd(path):
        # now check that no config from a random dataset at PWD is picked up
        # if not dataset instance was provided
        cfg = ConfigManager(dataset=None, source='any')
        assert_in('user.name', cfg)
        assert_not_in('datalad.dataset.id', cfg)
        assert_not_in('annex.version', cfg)
Esempio n. 55
0
def test_invalid_args(path):
    assert_raises(ValueError, install, 'Zoidberg', source='Zoidberg')
    # install to an invalid URL
    assert_raises(ValueError,
                  install,
                  'ssh://mars:Zoidberg',
                  source='Zoidberg')
    # install to a remote location
    assert_raises(ValueError,
                  install,
                  'ssh://mars/Zoidberg',
                  source='Zoidberg')
    # make fake dataset
    ds = create(path)
    assert_raises(ValueError, install, '/higherup.', 'Zoidberg', dataset=ds)
Esempio n. 56
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'])

    ok_clean_git(ds.path, annex=False)
    # those which succeeded should be saved now
    eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})

    # but if there was only a single installation requested -- it will be
    # InstallFailedError to stay consistent with single install behavior
    # TODO: unify at some point
    with assert_raises(InstallFailedError) as cme:
        ds.install('ds2')
    with assert_raises(InstallFailedError) as cme:
        ds.install('///nonexisting')
Esempio n. 57
0
def test_crawl(tempd):
    if not _get_github_cred().is_known:
        raise SkipTest("no github credential")
    ds = create(tempd)
    with chpwd(tempd):
        crawl_init(template='gh',
                   save=True,
                   args={
                       'org': 'datalad-collection-1',
                       'include': 'kaggle'
                   })
        crawl()
    subdss = ds.subdatasets(fulfilled=True, result_xfm='datasets')
    assert all('kaggle' in d.path for d in subdss)
    assert_greater(len(subdss), 1)
    assert_false(ds.repo.dirty)
Esempio n. 58
0
def test_bf3733(path=None):
    ds = create(path)
    # call siblings configure for an unknown sibling without a URL
    # doesn't work, but also doesn't crash
    assert_result_count(
        ds.siblings('configure',
                    name='imaginary',
                    publish_depends='doesntmatter',
                    url=None,
                    on_failure='ignore'),
        1,
        status='error',
        action="configure-sibling",
        name="imaginary",
        path=ds.path,
    )
Esempio n. 59
0
def test_no_annex(path):
    ds = create(path)
    ok_clean_git(ds.path)
    create_tree(ds.path,
                {'code': {
                    'inannex': 'content',
                    'notinannex': 'othercontent'
                }})
    # add two files, pre and post configuration
    ds.add(opj('code', 'inannex'))
    plugin(['no_annex', 'pattern=code/**'], dataset=ds)
    ds.add(opj('code', 'notinannex'))
    ok_clean_git(ds.path)
    # one is annex'ed, the other is not, despite no change in add call
    # importantly, also .gitattribute is not annexed
    eq_([opj('code', 'inannex')], ds.repo.get_annexed_files())
Esempio n. 60
0
def test_remove_subds(path):
    ds = create(path)
    ds.create('sub')
    ds.create(op.join('sub', 'subsub'))
    assert_repo_status(ds.path)
    assert_result_count(ds.subdatasets(), 1, path=op.join(ds.path, 'sub'))
    # all good at this point, subdataset known, dataset clean
    # now have some external force wipe out the subdatasets
    rmtree(op.join(ds.path, 'sub'))
    assert_result_count(ds.status(),
                        1,
                        path=op.join(ds.path, 'sub'),
                        state='deleted')
    # a single call to save() must fix up the mess
    assert_status('ok', ds.save())
    assert_repo_status(ds.path)