Example #1
0
def test_nitrc_pipeline(outd):
    get_test_providers('https://www.nitrc.org/ir/')
    from datalad.distribution.dataset import Dataset
    ds = Dataset(outd).create()
    with chpwd(outd):
        out = run_pipeline(
            pipeline(NITRC_IR, project='fcon_1000', subjects=['xnat_S00401']))
    eq_(len(out), 1)
Example #2
0
def test_ls_s3():
    url = 's3://datalad-test0-versioned/'
    with swallow_outputs():
        # just to skip if no credentials
        get_test_providers(url)

    with swallow_outputs() as cmo:
        assert_equal(ls(url), None)  # not output ATM
        assert_in('Bucket info:', cmo.out)
Example #3
0
def test_ls_s3():
    url = 's3://datalad-test0-versioned/'
    with swallow_outputs():
        # just to skip if no credentials
        get_test_providers(url)

    with swallow_outputs() as cmo:
        assert_equal(ls(url), None)  # not output ATM
        assert_in('Bucket info:', cmo.out)
Example #4
0
def test_version_url_deleted():
    get_test_providers('s3://datalad-test0-versioned/', reload=True)  # to verify having credentials to access
    # openfmri via S3
    # it existed and then was removed
    fpath = "1version-removed.txt"
    url = "http://datalad-test0-versioned.s3.amazonaws.com/%s" % fpath
    turl = "http://datalad-test0-versioned.s3.amazonaws.com/%s" \
           "?versionId=eZ5Hgwo8azfBv3QT7aW9dmm2sbLUY.QP" % fpath
    eq_(get_versioned_url(url), turl)
Example #5
0
def test_mtime(path, url, tempfile):
    # let's set custom mtime
    file_to_download = opj(path, 'file.dat')
    os.utime(file_to_download, (time.time(), 1000))
    assert_equal(os.stat(file_to_download).st_mtime, 1000)

    file_url = "%s/%s" % (url, 'file.dat')
    with swallow_outputs():
        get_test_providers().download(file_url, path=tempfile)
    assert_equal(os.stat(tempfile).st_mtime, 1000)
Example #6
0
def test_mtime(path, url, tempfile):
    # let's set custom mtime
    file_to_download = opj(path, 'file.dat')
    os.utime(file_to_download, (time.time(), 1000))
    assert_equal(os.stat(file_to_download).st_mtime, 1000)

    file_url = "%s/%s" % (url, 'file.dat')
    with swallow_outputs():
        get_test_providers().download(file_url, path=tempfile)
    assert_equal(os.stat(tempfile).st_mtime, 1000)
Example #7
0
def test_get_versioned_url():
    get_test_providers(
        's3://openfmri/tarballs'
    )  # to verify having credentials to access openfmri via S3
    for url_pref in ('http://openfmri.s3.amazonaws.com',
                     'https://s3.amazonaws.com/openfmri'):
        eq_(get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz"),
            url_pref + "/tarballs/ds001_raw.tgz?versionId=null")

        eq_(get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz?param=1"),
            url_pref + "/tarballs/ds001_raw.tgz?param=1&versionId=null")

        # We don't duplicate the version if it already exists.
        eq_(
            get_versioned_url(url_pref +
                              "/tarballs/ds001_raw.tgz?versionId=null"),
            url_pref + "/tarballs/ds001_raw.tgz?versionId=null")

    # something is wrong there
    #print(get_versioned_url("http://openfmri.s3.amazonaws.com/ds001/demographics.txt"))

    eq_(get_versioned_url("someurl"),
        "someurl")  # should just return original one
    assert_raises(RuntimeError,
                  get_versioned_url,
                  "someurl",
                  guarantee_versioned=True)

    # TODO: on a bucket without versioning
    url = "http://datalad-test0-nonversioned.s3.amazonaws.com/2versions-removed-recreated.txt"
    eq_(get_versioned_url(url), url)
    eq_(get_versioned_url(url, return_all=True), [url])

    assert_raises(NotImplementedError, get_versioned_url, "s3://buga")

    urls = get_versioned_url(
        "http://datalad-test0-versioned.s3.amazonaws.com/2versions-removed-recreated.txt",
        return_all=True,
        verify=True)
    eq_(len(set(urls)), len(urls))  # all unique
    for url in urls:
        # so we didn't grab other files along with the same prefix
        ok_startswith(
            url,
            'http://datalad-test0-versioned.s3.amazonaws.com/2versions-removed-recreated.txt?versionId='
        )

    # Update a versioned URL with a newer version tag.
    url_3ver = "http://datalad-test0-versioned.s3.amazonaws.com/3versions-allversioned.txt"
    url_3ver_input = url_3ver + "?versionId=b.qCuh7Sg58VIYj8TVHzbRS97EvejzEl"
    eq_(get_versioned_url(url_3ver_input), url_3ver_input)
    eq_(get_versioned_url(url_3ver_input, update=True),
        url_3ver + "?versionId=Kvuind11HZh._dCPaDAb0OY9dRrQoTMn")
Example #8
0
def test_ls_s3():
    url = 's3://datalad-test0-versioned/'
    with swallow_outputs():
        # just to skip if no credentials
        get_test_providers(url)

    with swallow_outputs() as cmo:
        res = ls(url)
        assert_equal(len(res), 17)  # all the entries
        counts = Counter(map(lambda x: x.__class__.__name__, res))
        assert_equal(counts, {'Key': 14, 'DeleteMarker': 3})
        assert_in('Bucket info:', cmo.out)
Example #9
0
def test_obscure_names(path):
    bucket = "datalad-test2-obscurenames-versioned"
    get_test_providers('s3://' + bucket)  # to verify having s3 credentials
    create(path)
    with externals_use_cassette('test_simple_s3_test2_obscurenames_versioned_crawl_ext'), \
         chpwd(path):
        crawl_init(template="simple_s3", args=dict(bucket=bucket), save=True)
        crawl()
    # fun with unicode was postponed
    ok_clean_git(path, annex=True)
    for f in ['f &$=@:+,?;', "f!-_.*'( )", 'f 1', 'f [1][2]']:
        ok_file_under_git(path, f, annexed=True)
Example #10
0
def test_ls_s3():
    url = 's3://datalad-test0-versioned/'
    with swallow_outputs():
        # just to skip if no credentials
        get_test_providers(url)

    with swallow_outputs() as cmo:
        res = ls(url)
        assert_equal(len(res), 17)  # all the entries
        counts = Counter(map(lambda x: x.__class__.__name__, res))
        assert_equal(counts, {'Key': 14, 'DeleteMarker': 3})
        assert_in('Bucket info:', cmo.out)
Example #11
0
def check_basic_scenario(url, d=None):
    ds = Dataset(d).create()
    annex = ds.repo

    # TODO skip if no boto or no credentials
    get_test_providers(url)  # so to skip if unknown creds

    # Let's try to add some file which we should have access to
    ds.download_url(url)
    ds.save()

    # git-annex got a fix where it stopped replacing - in the middle of the filename
    # Let's cater to the developers who might have some intermediate version and not
    # easy to compare -- we will just check that only one file there is an that it
    # matches what we expect when outside of the development versions range:
    filenames = glob.glob(op.join(d, '3versions[-_]allversioned.txt'))
    eq_(len(filenames), 1)
    filename = op.basename(filenames[0])
    if external_versions['cmd:annex'] < '8.20200501':
        assert_in('_', filename)
    # Date after the fix in 8.20200501-53-gcabbc91b1
    elif external_versions['cmd:annex'] >= '8.20200512':
        assert_in('-', filename)
    else:
        pass  # either of those is ok

    whereis1 = annex.whereis(filename, output='full')
    eq_(len(whereis1), 2)  # here and datalad
    annex.drop(filename)

    whereis2 = annex.whereis(filename, output='full')
    eq_(len(whereis2), 1)  # datalad

    # make sure that there are no "hidden" error messages, despite the
    # whereis command succeeding
    # https://github.com/datalad/datalad/issues/6453#issuecomment-1047533276
    from datalad.runner import StdOutErrCapture

    # we need to swallow logs since if DATALAD_LOG_LEVEL is set low, we
    # would get all the git-annex debug output in stderr
    with swallow_logs(new_level=logging.INFO) as cml:
        out = annex._call_annex(['whereis'], protocol=StdOutErrCapture)
        eq_(out['stderr'].strip(), '')

    # if we provide some bogus address which we can't access, we shouldn't pollute output
    with assert_raises(CommandError) as cme:
        annex.add_url_to_file('bogus', url + '_bogus')
    assert_in('addurl: 1 failed', cme.value.stderr)
Example #12
0
def _annex(path):
    annex = Annexificator(path, special_remotes=[DATALAD_SPECIAL_REMOTE])

    url = 's3://datalad-test0-versioned'
    providers = get_test_providers(url)  # to skip if no credentials
    # start with a fresh bucket each time so we could reuse the same vcr tapes work
    providers.get_provider(url).get_downloader(url).reset()
    return annex
Example #13
0
def check_download_external_url(url, failed_str, success_str, d, url_final=None):
    fpath = opj(d, get_url_straight_filename(url))
    providers = get_test_providers(url)  # url for check of credentials
    provider = providers.get_provider(url)
    downloader = provider.get_downloader(url)

    # Download way
    with swallow_outputs() as cmo:
        downloaded_path = downloader.download(url, path=d)
    assert_equal(fpath, downloaded_path)
    with open(fpath) as f:
        content = f.read()
        if success_str is not None:
            assert_in(success_str, content)
        if failed_str is not None:
            assert_false(failed_str in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            downloaded_path_ = downloader.download(url, path=d, size=s, overwrite=True)
        # should not be affected
        assert_equal(downloaded_path, downloaded_path_)
        with open(fpath) as f:
            content_ = f.read()
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Fetch way
    content = downloader.fetch(url)
    if success_str is not None:
        assert_in(success_str, content)
    if failed_str is not None:
        assert_false(failed_str in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            content_ = downloader.fetch(url, size=s)
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Verify status
    status = downloader.get_status(url)
    assert(isinstance(status, FileStatus))
    if not url.startswith('ftp://'):
        # TODO introduce support for mtime into requests_ftp?
        assert(status.mtime)
    assert(status.size)

    # Verify possible redirections
    if url_final is None:
        url_final = url
    assert_equal(downloader.get_target_url(url), url_final)
Example #14
0
def test_drop(path):
    get_test_providers('s3://datalad-test0-nonversioned')  # to verify having s3 credentials
    create(path)
    # unfortunately this doesn't work without force dropping since I guess vcr
    # stops and then gets queried again for the same tape while testing for
    # drop :-/
    with externals_use_cassette('test_simple_s3_test0_nonversioned_crawl_ext'), \
         chpwd(path):
        crawl_init(template="simple_s3",
                   args=dict(
                       bucket="datalad-test0-nonversioned",
                       drop=True,
                       drop_force=True  # so test goes faster
                   ),
                   save=True
                   )
        crawl()
    # test that all was dropped
    repo = AnnexRepo(path, create=False)
    files = glob(_path_(path, '*'))
    eq_(len(files), 8)
    for f in files:
        assert_false(repo.file_has_content(f))
Example #15
0
def check_download_external_url(url, failed_str, success_str, d):
    fpath = opj(d, get_url_straight_filename(url))
    providers = get_test_providers(url)  # url for check of credentials
    provider = providers.get_provider(url)
    downloader = provider.get_downloader(url)

    # Download way
    with swallow_outputs() as cmo:
        downloaded_path = downloader.download(url, path=d)
    assert_equal(fpath, downloaded_path)
    with open(fpath) as f:
        content = f.read()
        if success_str is not None:
            assert_in(success_str, content)
        if failed_str is not None:
            assert_false(failed_str in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            downloaded_path_ = downloader.download(url,
                                                   path=d,
                                                   size=s,
                                                   overwrite=True)
        # should not be affected
        assert_equal(downloaded_path, downloaded_path_)
        with open(fpath) as f:
            content_ = f.read()
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Fetch way
    content = downloader.fetch(url)
    if success_str is not None:
        assert_in(success_str, content)
    if failed_str is not None:
        assert_false(failed_str in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            content_ = downloader.fetch(url, size=s)
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Verify status
    status = downloader.get_status(url)
    assert (isinstance(status, FileStatus))
    assert (status.mtime)
    assert (status.size)
Example #16
0
def check_download_external_url(url, failed_str, success_str, d):
    fpath = opj(d, get_url_straight_filename(url))
    providers = get_test_providers(url)  # url for check of credentials
    provider = providers.get_provider(url)
    downloader = provider.get_downloader(url)

    # Download way
    with swallow_outputs() as cmo:
        downloaded_path = downloader.download(url, path=d)
    assert_equal(fpath, downloaded_path)
    with open(fpath) as f:
        content = f.read()
        if success_str is not None:
            assert_in(success_str, content)
        if failed_str is not None:
            assert_false(failed_str in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            downloaded_path_ = downloader.download(url, path=d, size=s, overwrite=True)
        # should not be affected
        assert_equal(downloaded_path, downloaded_path_)
        with open(fpath) as f:
            content_ = f.read()
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Fetch way
    content = downloader.fetch(url)
    if success_str is not None:
        assert_in(success_str, content)
    if failed_str is not None:
        assert_false(failed_str in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            content_ = downloader.fetch(url, size=s)
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Verify status
    status = downloader.get_status(url)
    assert(isinstance(status, FileStatus))
    assert(status.mtime)
    assert(status.size)
Example #17
0
def _test_drop(path, drop_immediately):
    s3url = 's3://datalad-test0-nonversioned'
    providers = get_test_providers(s3url)  # to verify having s3 credentials
    # vcr tape is getting bound to the session object, so we need to
    # force re-establishing the session for the bucket.
    # TODO (in datalad): make a dedicated API for that, now too obscure
    _ = providers.get_status(s3url, allow_old_session=False)
    create(path)
    # unfortunately this doesn't work without force dropping since I guess vcr
    # stops and then gets queried again for the same tape while testing for
    # drop :-/
    with chpwd(path):
        crawl_init(
            template="simple_s3",
            args=dict(
                bucket="datalad-test0-nonversioned",
                drop=True,
                drop_force=True,  # so test goes faster
                drop_immediately=drop_immediately,
            ),
            save=True)
    if drop_immediately:
        # cannot figure out but taping that interaction results in
        # git annex addurl  error.  No time to figure it out
        # so we just crawl without vcr for now. TODO: figure out WTF
        with chpwd(path):
            crawl()
    else:
        with externals_use_cassette(
                'test_simple_s3_test0_nonversioned_crawl_ext'
                + ('_immediately' if drop_immediately else '')), \
                chpwd(path):
            crawl()
    # test that all was dropped
    repo = AnnexRepo(path, create=False)
    files = glob(_path_(path, '*'))
    eq_(len(files), 8)
    for f in files:
        assert_false(repo.file_has_content(f))
Example #18
0
def check_download_external_url(url,
                                failed_str,
                                success_str,
                                d,
                                url_final=None):
    fpath = opj(d, get_url_straight_filename(url))
    providers = get_test_providers(url)  # url for check of credentials
    provider = providers.get_provider(url)
    downloader = provider.get_downloader(url)

    # we will load/fetch binary blobs
    success_bytes, failed_bytes = None, None
    if success_str is not None:
        success_bytes = success_str.encode()
    if failed_str is not None:
        failed_bytes = failed_str.encode()

    # Download way
    with swallow_outputs() as cmo:
        downloaded_path = downloader.download(url, path=d)
    assert_equal(fpath, downloaded_path)
    content = read_file(fpath, decode=False)
    if success_bytes is not None:
        assert_in(success_bytes, content)
    if failed_str is not None:
        assert_false(failed_bytes in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            downloaded_path_ = downloader.download(url,
                                                   path=d,
                                                   size=s,
                                                   overwrite=True)
        # should not be affected
        assert_equal(downloaded_path, downloaded_path_)
        content_ = read_file(fpath, decode=False)
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Fetch way
    content = downloader.fetch(url, decode=False)
    if success_bytes is not None:
        assert_in(success_bytes, content)
    if failed_bytes is not None:
        assert_false(failed_bytes in content)

    # And if we specify size
    for s in [1, 2]:
        with swallow_outputs() as cmo:
            content_ = downloader.fetch(url, size=s, decode=False)
        assert_equal(len(content_), s)
        assert_equal(content_, content[:s])

    # Verify status
    status = downloader.get_status(url)
    assert (isinstance(status, FileStatus))
    if not url.startswith('ftp://'):
        # TODO introduce support for mtime into requests_ftp?
        assert (status.mtime)
    assert (status.size)

    # Verify possible redirections
    if url_final is None:
        url_final = url
    assert_equal(downloader.get_target_url(url), url_final)