Ejemplo n.º 1
0
    def __init__(self, resource, submission_type, job_spec=None,
                 resurrection=False):
        external_versions.check("datalad", min_version="0.13")
        super(DataladOrchestrator, self).__init__(
            resource, submission_type, job_spec, resurrection=resurrection)

        from datalad.api import Dataset
        self.ds = Dataset(".")
        if not self.ds.id:
            raise OrchestratorError("orchestrator {} requires a local dataset"
                                    .format(self.name))

        if self._resurrection:
            self.head = self.job_spec.get("_head")
        else:
            if self.ds.repo.dirty:
                raise OrchestratorError("Local dataset {} is dirty. "
                                        "Save or discard uncommitted changes"
                                        .format(self.ds.path))
            self._configure_repo()
            self.head = self.ds.repo.get_hexsha()
            _datalad_check_container(self.ds, self.job_spec)
            _datalad_format_command(self.ds, self.job_spec)

        if isinstance(self.session, SSHSession) and resource.key_filename:
            # Make the identity file available to 'datalad sshrun' even
            # if it is not configured in .ssh/config. This is
            # particularly important for AWS keys.
            os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename
            from datalad import cfg
            cfg.reload(force=True)
Ejemplo n.º 2
0
    def __call__(module=None,
                 verbose=False,
                 nocapture=False,
                 pdb=False,
                 stop=False):
        if not module:
            from pkg_resources import iter_entry_points
            module = ['datalad']
            module.extend(ep.module_name
                          for ep in iter_entry_points('datalad.tests'))
        module = ensure_list(module)
        lgr.info('Starting test run for module(s): %s', module)

        # Exception (traceback) logging is disabled by default. However, as of
        # now we do test logging output in (too) great detail. Therefore enable
        # it here, so `datalad-test` doesn't fail by default.
        # Can be removed whenever the tests don't require it.
        from datalad import cfg as dlcfg
        from datalad.tests.utils import patch
        try:
            with patch.dict('os.environ', {'DATALAD_LOG_EXC': '1'}):
                dlcfg.reload()
                for mod in module:
                    datalad.test(module=mod,
                                 verbose=verbose,
                                 nocapture=nocapture,
                                 pdb=pdb,
                                 stop=stop)
        finally:
            dlcfg.reload()
Ejemplo n.º 3
0
def test_plugin_config(path):
    # baseline behavior, empty datasets on create
    ds = create(dataset=opj(path, 'ds1'))
    eq_(sorted(os.listdir(ds.path)), ['.datalad', '.git', '.gitattributes'])
    # now we configure a plugin to run twice after `create`
    cfg.add('datalad.create.run-after',
            'add_readme filename=after1.txt',
            where='global')
    cfg.add('datalad.create.run-after',
            'add_readme filename=after2.txt',
            where='global')
    # force reload to pick up newly populated .gitconfig
    cfg.reload(force=True)
    assert_in('datalad.create.run-after', cfg)
    # and now we create a dataset and expect the two readme files
    # to be part of it
    ds = create(dataset=opj(path, 'ds'))
    ok_clean_git(ds.path)
    assert(exists(opj(ds.path, 'after1.txt')))
    assert(exists(opj(ds.path, 'after2.txt')))
    # cleanup
    cfg.unset(
        'datalad.create.run-after',
        where='global')
    assert_not_in('datalad.create.run-after', cfg)
Ejemplo n.º 4
0
def test_ssh_custom_identity_file():
    ifile = "/tmp/dl-test-ssh-id"  # Travis
    if not op.exists(ifile):
        raise SkipTest("Travis-specific '{}' identity file does not exist"
                       .format(ifile))

    from datalad import cfg
    try:
        with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}):
            cfg.reload(force=True)
            with swallow_logs(new_level=logging.DEBUG) as cml:
                manager = SSHManager()
                ssh = manager.get_connection('ssh://localhost')
                cmd_out, _ = ssh("echo blah")
                expected_socket = op.join(
                    str(manager.socket_dir),
                    get_connection_hash("localhost", identity_file=ifile,
                                        bundled=True))
                ok_(exists(expected_socket))
                manager.close()
                assert_in("-i", cml.out)
                assert_in(ifile, cml.out)
    finally:
        # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering.
        cfg.reload(force=True)
Ejemplo n.º 5
0
def test_ssh_custom_identity_file():
    ifile = "/tmp/dl-test-ssh-id"  # Travis
    if not op.exists(ifile):
        raise SkipTest("Travis-specific '{}' identity file does not exist"
                       .format(ifile))

    from datalad import cfg
    try:
        with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}):
            cfg.reload(force=True)
            with swallow_logs(new_level=logging.DEBUG) as cml:
                manager = SSHManager()
                ssh = manager.get_connection('ssh://localhost')
                cmd_out, _ = ssh("echo blah")
                expected_socket = op.join(
                    text_type(manager.socket_dir),
                    get_connection_hash("localhost", identity_file=ifile,
                                        bundled=True))
                ok_(exists(expected_socket))
                manager.close()
                assert_in("-i", cml.out)
                assert_in(ifile, cml.out)
    finally:
        # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering.
        cfg.reload(force=True)
def check_integration1(login,
                       keyring,
                       path,
                       organization=None,
                       kwargs={},
                       oauthtokens=None):
    kwargs = kwargs.copy()
    if organization:
        kwargs['github_organization'] = organization

    ds = Dataset(path).create()
    if oauthtokens:
        for oauthtoken in assure_list(oauthtokens):
            ds.config.add('hub.oauthtoken', oauthtoken, where='local')

    # so we do not pick up local repo configuration/token
    repo_name = 'test_integration1'
    with chpwd(path):
        # ATM all the github goodness does not care about "this dataset"
        # so force "process wide" cfg to pick up our defined above oauthtoken
        cfg.reload(force=True)
        # everything works just nice, no conflicts etc
        res = ds.create_sibling_github(repo_name, **kwargs)

        if organization:
            url_fmt = 'https://{login}@github.com/{organization}/{repo_name}.git'
        else:
            url_fmt = 'https://github.com/{login}/{repo_name}.git'
        eq_(res, [(ds, url_fmt.format(**locals()), False)])

        # but if we rerun - should kaboom since already has this sibling:
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, **kwargs)
        assert_in("already has a configured sibling", str(cme.exception))

        # but we can give it a new name, but it should kaboom since the remote one
        # exists already
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, name="github2", **kwargs)
        assert_in("already exists on", str(cme.exception))
        # we should not leave the broken sibling behind
        assert_not_in('github2', ds.repo.get_remotes())

        # If we ask to reconfigure - should proceed normally
        ds.create_sibling_github(repo_name, existing='reconfigure', **kwargs)
    cfg.reload(force=True)
Ejemplo n.º 7
0
def check_integration1(login, keyring,
                       path,
                       organization=None,
                       kwargs={},
                       oauthtokens=None):
    kwargs = kwargs.copy()
    if organization:
        kwargs['github_organization'] = organization

    ds = Dataset(path).create()
    if oauthtokens:
        for oauthtoken in assure_list(oauthtokens):
            ds.config.add('hub.oauthtoken', oauthtoken, where='local')

    # so we do not pick up local repo configuration/token
    repo_name = 'test_integration1'
    with chpwd(path):
        # ATM all the github goodness does not care about "this dataset"
        # so force "process wide" cfg to pick up our defined above oauthtoken
        cfg.reload(force=True)
        # everything works just nice, no conflicts etc
        res = ds.create_sibling_github(repo_name, **kwargs)

        if organization:
            url_fmt = 'https://{login}@github.com/{organization}/{repo_name}.git'
        else:
            url_fmt = 'https://github.com/{login}/{repo_name}.git'
        eq_(res, [(ds, url_fmt.format(**locals()), False)])

        # but if we rerun - should kaboom since already has this sibling:
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, **kwargs)
        assert_in("already has a configured sibling", str(cme.exception))

        # but we can give it a new name, but it should kaboom since the remote one
        # exists already
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, name="github2", **kwargs)
        assert_in("already exists on", str(cme.exception))
        # we should not leave the broken sibling behind
        assert_not_in('github2', ds.repo.get_remotes())

        # If we ask to reconfigure - should proceed normally
        ds.create_sibling_github(repo_name, existing='reconfigure', **kwargs)
    cfg.reload(force=True)
Ejemplo n.º 8
0
def test_CapturedException():

    try:
        raise Exception("BOOM")
    except Exception as e:
        captured_exc = CapturedException(e)

    assert_re_in(
        r"BOOM \[test_captured_exception.py:test_CapturedException:[0-9]+\]",
        captured_exc.format_oneline_tb())
    assert_re_in(
        r"^\[.*\]",
        captured_exc.format_oneline_tb(include_str=False))  # only traceback

    try:
        raise NotImplementedError
    except Exception as e:
        captured_exc = CapturedException(e)

    assert_re_in(
        r"NotImplementedError \[test_captured_exception.py:test_CapturedException:[0-9]+\]",
        captured_exc.format_oneline_tb())

    def f():
        def f2():
            raise Exception("my bad again")

        try:
            f2()
        except Exception as e:
            # exception chain
            raise RuntimeError("new message") from e

    try:
        f()
    except Exception as e:
        captured_exc = CapturedException(e)

    # default limit: one level:
    estr1 = captured_exc.format_oneline_tb(limit=1)
    estr2 = captured_exc.format_oneline_tb(limit=2)
    # and we can control it via environ/config by default
    try:
        with patch.dict('os.environ', {'DATALAD_EXC_STR_TBLIMIT': '3'}):
            cfg.reload()
            estr3 = captured_exc.format_oneline_tb()
        with patch.dict('os.environ', {}, clear=True):
            cfg.reload()
            estr_ = captured_exc.format_oneline_tb()
    finally:
        cfg.reload()  # make sure we don't have a side effect on other tests

    estr_full = captured_exc.format_oneline_tb(10)

    assert_re_in(
        r"new message \[test_captured_exception.py:test_CapturedException:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]",
        estr_full)
    assert_re_in(
        r"new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]",
        estr3)
    assert_re_in(
        r"new message \[test_captured_exception.py:f:[0-9]+,test_captured_exception.py:f2:[0-9]+\]",
        estr2)
    assert_re_in(r"new message \[test_captured_exception.py:f2:[0-9]+\]",
                 estr1)
    # default: no limit:
    assert_equal(estr_, estr_full)

    # standard output
    full_display = captured_exc.format_standard().splitlines()

    assert_equal(full_display[0], "Traceback (most recent call last):")
    # points in f and f2 for first exception with two lines each
    # (where is the line and what reads the line):
    assert_true(full_display[1].lstrip().startswith("File"))
    assert_equal(full_display[2].strip(), "f2()")
    assert_true(full_display[3].lstrip().startswith("File"))
    assert_equal(full_display[4].strip(), "raise Exception(\"my bad again\")")
    assert_equal(full_display[5].strip(), "Exception: my bad again")
    assert_equal(
        full_display[7].strip(),
        "The above exception was the direct cause of the following exception:")
    assert_equal(full_display[9], "Traceback (most recent call last):")
    # ...
    assert_equal(full_display[-1].strip(), "RuntimeError: new message")

    # CapturedException.__repr__:
    assert_re_in(r".*test_captured_exception.py:f2:[0-9]+\]$",
                 captured_exc.__repr__())
Ejemplo n.º 9
0
    def prepare_remote(self):
        """Prepare dataset sibling on remote.
        """
        if not self.ds.repo.get_active_branch():
            # publish() fails when HEAD is detached.
            raise OrchestratorError(
                "You must be on a branch to use the {} orchestrator".format(
                    self.name))
        if not self.session.exists(self.root_directory):
            self.session.mkdir(self.root_directory, parents=True)

        resource = self.resource
        session = self.session

        inputs = list(self.get_inputs())
        if isinstance(session, SSHSession):
            if resource.key_filename:
                dl_version = external_versions["datalad"]
                if dl_version < "0.11.3":
                    # Connecting will probably fail because `key_filename` is
                    # set, but we have no way to tell DataLad about it.
                    lgr.warning(
                        "DataLad version %s detected. "
                        "0.11.3 or greater is required to use an "
                        "identity file not specified in ~/.ssh/config",
                        dl_version)
                # Make the identity file available to 'datalad sshrun' even if
                # it is not configured in .ssh/config. This is particularly
                # important for AWS keys.
                os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename
                from datalad import cfg
                cfg.reload(force=True)

            sshurl = _format_ssh_url(
                resource.user,
                # AWS resource does not have host attribute.
                getattr(resource, "host", None) or session.connection.host,
                getattr(resource, "port", None),
                self.working_directory)

            # TODO: Add one level deeper with reckless clone per job to deal
            # with concurrent jobs?
            if not session.exists(self.working_directory):
                remotes = self.ds.repo.get_remotes()
                if resource.name in remotes:
                    raise OrchestratorError(
                        "Remote '{}' unexpectedly exists. "
                        "Either delete remote or rename resource.".format(
                            resource.name))

                self.ds.create_sibling(sshurl,
                                       name=resource.name,
                                       recursive=True)
                since = None  # Avoid since="" for non-existing repo.
            else:
                remote_branch = "{}/{}".format(
                    resource.name, self.ds.repo.get_active_branch())
                if self.ds.repo.commit_exists(remote_branch):
                    since = ""
                else:
                    # If the remote branch doesn't exist yet, publish will fail
                    # with since="".
                    since = None

            from datalad.support.exceptions import IncompleteResultsError
            try:
                self.ds.publish(to=resource.name, since=since, recursive=True)
            except IncompleteResultsError:
                raise OrchestratorError(
                    "'datalad publish' failed. Try running "
                    "'datalad update -s {} --merge --recursive' first".format(
                        resource.name))

            self._fix_up_dataset()

            if inputs:
                lgr.info("Making inputs available")
                try:
                    # TODO: Whether we try this `get` should be configurable.
                    self._execute_in_wdir("datalad get {}".format(
                        # FIXME: This should use something like
                        # execute_command_batch.
                        " ".join(map(shlex_quote, inputs))))
                except OrchestratorError:
                    # Should use --since for existing repo, but it doesn't seem
                    # to sync wrt content.
                    self.ds.publish(to=resource.name,
                                    path=inputs,
                                    recursive=True)
        elif resource.type == "shell":
            import datalad.api as dl
            if not session.exists(self.working_directory):
                dl.install(self.working_directory, source=self.ds.path)

            self.session.execute_command("git push '{}' HEAD:{}-base".format(
                self.working_directory, self.job_refname))
            self._checkout_target()

            if inputs:
                installed_ds = dl.Dataset(self.working_directory)
                installed_ds.get(inputs)
        else:
            # TODO: Handle more types?
            raise OrchestratorError("Unsupported resource type {}".format(
                resource.type))

        if not session.exists(self.meta_directory):
            session.mkdir(self.meta_directory, parents=True)
Ejemplo n.º 10
0
    def prepare_remote(self):
        """Prepare dataset sibling on remote.
        """
        repo = self.ds.repo
        if not repo.get_active_branch():
            # publish() fails when HEAD is detached.
            raise OrchestratorError(
                "You must be on a branch to use the {} orchestrator"
                .format(self.name))
        if not self.session.exists(self.root_directory):
            self.session.mkdir(self.root_directory, parents=True)

        resource = self.resource
        session = self.session

        inputs = list(self.get_inputs())
        if isinstance(session, (SSHSession, ShellSession)):
            if isinstance(session, SSHSession):
                if resource.key_filename:
                    # Make the identity file available to 'datalad sshrun' even
                    # if it is not configured in .ssh/config. This is
                    # particularly important for AWS keys.
                    os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename
                    from datalad import cfg
                    cfg.reload(force=True)

                target_path = _format_ssh_url(
                    resource.user,
                    # AWS resource does not have host attribute.
                    getattr(resource, "host", None) or session.connection.host,
                    getattr(resource, "port", None),
                    self.working_directory)
            else:
                target_path = self.working_directory

            # TODO: Add one level deeper with reckless clone per job to deal
            # with concurrent jobs?
            target_exists = session.exists(self.working_directory)
            if not target_exists:
                since = None  # Avoid since="" for non-existing repo.
            else:
                remote_branch = "{}/{}".format(
                    resource.name,
                    repo.get_active_branch())
                if repo.commit_exists(remote_branch):
                    since = ""
                else:
                    # If the remote branch doesn't exist yet, publish will fail
                    # with since="".
                    since = None

            remotes = repo.get_remotes()
            if resource.name in remotes:
                if repo.get_remote_url(resource.name) != target_path:
                    raise OrchestratorError(
                        "Remote '{}' already exists with another URL. "
                        "Either delete remote or rename resource."
                        .format(resource.name))
                elif not target_exists:
                    lgr.debug(
                        "Remote '%s' matches resource name "
                        "and points to the expected target, "
                        "which doesn't exist.  "
                        "Removing remote and recreating",
                        resource.name)
                    repo.remove_remote(resource.name)

            self.ds.create_sibling(target_path, name=resource.name,
                                   recursive=True, existing="skip")

            call_check_dl_results(
                self.ds.publish, "'datalad publish' failed",
                to=resource.name, since=since,
                recursive=True, on_failure="ignore")

            self._fix_up_dataset()

            if inputs:
                lgr.info("Making inputs available")
                try:
                    # TODO: Whether we try this `get` should be configurable.
                    self._execute_in_wdir("datalad get {}".format(
                        # FIXME: This should use something like
                        # execute_command_batch.
                        " ".join(map(shlex_quote, inputs))))
                except OrchestratorError:
                    # Should use --since for existing repo, but it doesn't seem
                    # to sync wrt content.
                    self.ds.publish(to=resource.name, path=inputs,
                                    recursive=True)
        else:
            # TODO: Handle more types?
            raise OrchestratorError("Unsupported resource type {}"
                                    .format(resource.type))

        if not session.exists(self.meta_directory):
            session.mkdir(self.meta_directory, parents=True)