def test_ssh_copy(sourcedir, sourcefile1, sourcefile2):
    port = get_ssh_port('datalad-test')
    remote_url = 'ssh://datalad-test:{}'.format(port)
    manager = SSHManager()
    ssh = manager.get_connection(remote_url)

    # write to obscurely named file in sourcedir
    obscure_file = opj(sourcedir, get_most_obscure_supported_name())
    with open(obscure_file, 'w') as f:
        f.write("three")

    # copy tempfile list to remote_url:sourcedir
    sourcefiles = [sourcefile1, sourcefile2, obscure_file]
    ssh.put(sourcefiles, opj(remote_url, sourcedir))
    # docs promise that connection is auto-opened in case of multiplex
    if _ssh_manager_is_multiplex:
        ok_(ssh.is_open())

    # recursive copy tempdir to remote_url:targetdir
    targetdir = sourcedir + '.c opy'
    ssh.put(sourcedir,
            opj(remote_url, targetdir),
            recursive=True,
            preserve_attrs=True)

    # check if sourcedir copied to remote_url:targetdir
    ok_(isdir(targetdir))
    # check if scp preserved source directory attributes
    # if source_mtime=1.12s, scp -p sets target_mtime = 1.0s, test that
    eq_(getmtime(targetdir), int(getmtime(sourcedir)) + 0.0)

    # check if targetfiles(and its content) exist in remote_url:targetdir,
    # this implies file(s) and recursive directory copying pass
    for targetfile, content in zip(sourcefiles, ["one", "two", "three"]):
        targetpath = opj(targetdir, targetfile)
        ok_(exists(targetpath))
        with open(targetpath, 'r') as fp:
            eq_(content, fp.read())

    # and now a quick smoke test for get
    # but simplify the most obscure filename slightly to not trip `scp` itself
    togetfile = Path(targetdir) / (
        get_most_obscure_supported_name().replace('`', '') + '2')
    togetfile.write_text(str('something'))
    ssh.get(opj(remote_url, str(togetfile)), sourcedir)
    ok_((Path(sourcedir) / togetfile.name).exists())

    ssh.close()
Ejemplo n.º 2
0
def test_target_ssh_simple(origin, src_path, target_rootpath):
    port = get_ssh_port("datalad-test")
    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://*****:*****@with_testsui(responses=["yes"])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(
                dataset=source,
                name="local_target",
                sshurl="ssh://datalad-test" + target_path,
                publish_by_default=DEFAULT_BRANCH,
                existing='replace',
                ui=have_webui(),
            )

        interactive_assert_create_sshwebserver()

        eq_("ssh://datalad-test" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            lclcfg = AnnexRepo(src_path).config
            eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false')
            # valid uuid
            eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4)
            # should be added too, even if URL matches prior state
            eq_(lclcfg.get('remote.local_target.push'), DEFAULT_BRANCH)

        # again, by explicitly passing urls. Since we are on datalad-test, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://datalad-test",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://datalad-test" + target_path,
            ui=have_webui(),
        )

        @with_testsui(responses=['yes'])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(existing='replace', **cpkwargs)

        interactive_assert_create_sshwebserver()

        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://datalad-test" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        if have_webui():
            from datalad_deprecated.tests.test_create_sibling_webui \
                import assert_publish_with_ui
            assert_publish_with_ui(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            # and ignore .git/logs content (gh-5298)
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs') \
                        or f.startswith('.git/logs/'):
                    digests.pop(f)
                    mtimes.pop(f)

        if not have_webui():
            # the rest of the test assumed that we have uploaded a UI
            return
        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
        }
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        ok_(modified_files.issuperset(ok_modified_files))