Ejemplo n.º 1
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        output = runner.run(["datalad", "--help"])
    except CommandError, e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" % (e.code, (e.stdout, e.stderr)))
Ejemplo n.º 2
0
def check_runner_heavy_output(log_online):
    # TODO: again, no automatic detection of this resulting in being stucked yet.

    runner = Runner()
    cmd = '%s -c "import datalad.tests.heavyoutput;"' % sys.executable
    with swallow_outputs() as cm:
        ret = runner.run(cmd,
                         log_stderr=False,
                         log_stdout=False,
                         expect_stderr=True)
        eq_(cm.err, cm.out)  # they are identical in that script
        eq_(cm.out[:10], "[0, 1, 2, ")
        eq_(cm.out[-15:], "997, 998, 999]\n")

    #do it again with capturing:
    ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True)

    # and now original problematic command with a massive single line
    if not log_online:
        # We know it would get stuck in online mode
        cmd = '%s -c "import sys; x=str(list(range(1000))); [(sys.stdout.write(x), sys.stderr.write(x)) for i in xrange(100)];"' % sys.executable
        ret = runner.run(cmd,
                         log_stderr=True,
                         log_stdout=True,
                         expect_stderr=True)
Ejemplo n.º 3
0
def test_runner_log_stdout():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd_ = ['echo', 'stdout-Message should be logged']
    for cmd in [cmd_, ' '.join(cmd_)]:
        # should be identical runs, either as a string or as a list
        kw = {}
        # on Windows it can't find echo if ran outside the shell
        if on_windows and isinstance(cmd, list):
            kw['shell'] = True
        with swallow_logs(logging.DEBUG) as cm:
            ret = runner.run(cmd, log_stdout=True, **kw)
            eq_(cm.lines[0], "Running: %s" % cmd)
            if not on_windows:
                # we can just count on sanity
                eq_(cm.lines[1], "stdout| stdout-Message should be logged")
            else:
                # echo outputs quoted lines for some reason, so relax check
                ok_("stdout-Message should be logged" in cm.lines[1])
        assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))

    cmd = 'echo stdout-Message should not be logged'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stdout=False)
            eq_(cmo.out, "stdout-Message should not be logged\n")
            eq_(cml.out, "")
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
Ejemplo n.º 4
0
def compress_files(files, archive, path=None, overwrite=True):
    """Compress `files` into an `archive` file

    Parameters
    ----------
    files : list of str
    archive : str
    path : str
      Alternative directory under which compressor will be invoked, to e.g.
      take into account relative paths of files and/or archive
    overwrite : bool
      Whether to allow overwriting the target archive file if one already exists
    """
    runner = Runner(cwd=path)
    apath = Path(archive)
    if apath.exists():
        if overwrite:
            apath.unlink()
        else:
            raise ValueError(
                'Target archive {} already exists and overwrite is forbidden'.
                format(apath))
    if len(apath.suffixes) > 1 and apath.suffixes[-2] == '.tar':
        cmd = '7z u .tar -so -- {} | 7z u -si -- {}'.format(
            ' '.join(quote_cmdlinearg(f) for f in files),
            quote_cmdlinearg(str(apath)),
        )
    else:
        cmd = ['7z', 'u', str(apath), '--'] + files
    runner.run(cmd)
Ejemplo n.º 5
0
def test_runner_log_stdout():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd_ = ['echo', 'stdout-Message should be logged']
    for cmd in [cmd_, ' '.join(cmd_)]:
        # should be identical runs, either as a string or as a list
        kw = {}
        # on Windows it can't find echo if ran outside the shell
        if on_windows and isinstance(cmd, list):
            kw['shell'] = True
        with swallow_logs(logging.DEBUG) as cm:
            ret = runner.run(cmd, log_stdout=True, **kw)
            eq_(cm.lines[0], "Running: %s" % cmd)
            if not on_windows:
                # we can just count on sanity
                eq_(cm.lines[1], "stdout| stdout-Message should be logged")
            else:
                # echo outputs quoted lines for some reason, so relax check
                ok_("stdout-Message should be logged" in cm.lines[1])
        assert_equal(
            runner.commands, [],
            "Run of: %s resulted in non-empty buffer: %s" %
            (cmd, runner.commands.__str__()))

    cmd = 'echo stdout-Message should not be logged'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stdout=False)
            eq_(cmo.out, "stdout-Message should not be logged\n")
            eq_(cml.out, "")
    assert_equal(
        runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" %
        (cmd, runner.commands.__str__()))
Ejemplo n.º 6
0
def get_singularity_jobspec(cmd):
    """Extract the runscript of a singularity container used as an executable

    Parameters
    ----------
    cmd : list
      A command as an argument list.

    Returns
    -------
    None or str, None or list
      If no singularity is available, or the executable in the command is not
      a singularity image given by its path, None is return. Otherwise the
      runscript of the container is returned a string. The second value is
      None if the first is None, or a list of arguments to the runscript.
    """
    # get the path to the command's executable
    exec_path = cmd[0]

    runner = Runner()
    if not op.exists(exec_path):
        # probably a command from PATH
        return

    # this is a real file, not just a command on the path
    try:
        stdout, stderr = runner.run(
            ['singularity', '--version'],
            log_stdout=True,
            log_stderr=True,
            expect_stderr=True,
            expect_fail=True,
        )
        # TODO could be used to tailor handling to particular versions
    except CommandError as e:  # pragma: no cover
        # we do not have a singularity installation that we can handle
        # log debug, because there is no guarantee that the executable
        # actually was a singularity container
        lgr.debug('No suitable singularity version installed: %s', exc_str(e))
        return
    # we have singularity
    try:
        stdout, stderr = runner.run(
            # stringification only needed for pythons older than 3.6
            ['singularity', 'exec', exec_path, 'cat', '/singularity'],
            log_stdout=True,
            log_stderr=True,
            expect_stderr=True,
            expect_fail=True,
        )
        # TODO could be used to tailor handling to particular versions
    except CommandError as e:
        # we do not have a singularity installation that we can handle
        # log debug, because there is no guarantee that the executable
        # actually was a singularity container
        lgr.debug('%s is not a singularity image: %s', exec_path, exc_str(e))
        return
    # all but the container itself are the arguments
    return exec_path, cmd[1:]
Ejemplo n.º 7
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        output = runner.run(["datalad", "--help"])
    except CommandError, e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" %
                             (e.code, (e.stdout, e.stderr)))
Ejemplo n.º 8
0
 def setup(self):
     self.runner = Runner()
     # older versions might not have it
     try:
         from datalad.cmd import GitRunner
         self.git_runner = GitRunner()
     except ImportError:
         pass
Ejemplo n.º 9
0
 def create_info_file(self):
     runner = Runner()
     git_version = runner.run("git --version")[0].split()[2]
     self.create_file('INFO.txt',
                      "git: %s\n"
                      "datalad: %s\n"
                      % (git_version, __version__),
                      annex=False)
Ejemplo n.º 10
0
def test_cfg_passthrough(path):
    runner = Runner()
    _ = runner.run([
        'datalad', '-c', 'annex.tune.objecthash1=true', '-c',
        'annex.tune.objecthashlower=true', 'create', path
    ])
    ds = Dataset(path)
    eq_(ds.config.get('annex.tune.objecthash1', None), 'true')
    eq_(ds.config.get('annex.tune.objecthashlower', None), 'true')
Ejemplo n.º 11
0
def test_no_stdin_swallow(fname):
    # will relay actual exit code on CommandError
    cmd = ['datalad', 'sshrun', 'localhost', 'cat']

    out, err = Runner().run(cmd, stdin=open(fname))
    assert_equal(out.rstrip(), '123magic')

    # test with -n switch now, which we could place even at the end
    out, err = Runner().run(cmd + ['-n'], stdin=open(fname))
    assert_equal(out, '')
Ejemplo n.º 12
0
def test_cfg_passthrough(path):
    runner = Runner()
    _ = runner.run(
        ['datalad',
         '-c', 'annex.tune.objecthash1=true',
         '-c', 'annex.tune.objecthashlower=true',
         'create', path])
    ds = Dataset(path)
    eq_(ds.config.get('annex.tune.objecthash1', None), 'true')
    eq_(ds.config.get('annex.tune.objecthashlower', None), 'true')
Ejemplo n.º 13
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        # suppress log output happen it was set to high values
        with patch.dict('os.environ', {'DATALAD_LOG_LEVEL': 'WARN'}):
            output = runner.run(["datalad", "--help"])
    except CommandError as e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" % (e.code, (e.stdout, e.stderr)))
    return output
Ejemplo n.º 14
0
def test_runner_failure(dir):

    runner = Runner()
    failing_cmd = ['git', 'annex', 'add', 'notexistent.dat']
    assert_raises(CommandError, runner.run, failing_cmd, cwd=dir)

    try:
        runner.run(failing_cmd, cwd=dir)
    except CommandError, e:
        assert_equal(1, e.code)
        assert_in('notexistent.dat not found', e.stderr)
Ejemplo n.º 15
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        # suppress log output happen it was set to high values
        with patch.dict('os.environ', {'DATALAD_LOGLEVEL': 'WARN'}):
            output = runner.run(["datalad", "--help"])
    except CommandError as e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" %
                             (e.code, (e.stdout, e.stderr)))
    return output
Ejemplo n.º 16
0
def test_runner_failure(dir):

    runner = Runner()
    failing_cmd = ['git', 'annex', 'add', 'notexistent.dat']
    assert_raises(CommandError, runner.run, failing_cmd, cwd=dir)

    try:
        runner.run(failing_cmd, cwd=dir)
    except CommandError, e:
        assert_equal(1, e.code)
        assert_in('notexistent.dat not found', e.stderr)
Ejemplo n.º 17
0
def test_runner_log_stderr():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd = 'echo stderr-Message should be logged >&2'
    ret = runner.run(cmd, log_stderr=True, expect_stderr=True)
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))

    cmd = 'echo stderr-Message should not be logged >&2'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stderr=False)
            eq_(cmo.err.rstrip(), "stderr-Message should not be logged")
            eq_(cml.out, "")
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
Ejemplo n.º 18
0
def test_runner(tempfile):

    # test non-dry command call
    runner = Runner(dry=False)
    cmd = 'echo Testing real run > %s' % tempfile
    ret = runner.run(cmd)
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
    assert_true(os.path.exists(tempfile), "Run of: %s resulted with non-existing file %s" % (cmd, tempfile))

    # test non-dry python function call
    output = runner.call(os.path.join, 'foo', 'bar')
    assert_equal(os.path.join('foo', 'bar'), output,
                 "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output)
    assert_equal(runner.commands.__str__().find('os.path.join'), -1,
                   "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" % runner.commands.__str__())
Ejemplo n.º 19
0
def test_runner_instance_callable_dry():

    cmd_ = ['echo', 'Testing __call__ with string']
    for cmd in [cmd_, ' '.join(cmd_)]:
        runner = Runner(dry=True)
        ret = runner(cmd)
        eq_(ret,
            ("DRY", "DRY"
             ))  # (stdout, stderr) is returned.  But in dry -- ("DRY","DRY")
        assert_equal(
            runner.commands.__str__(), ("[%r]" % cmd),
            "Dry run of Runner.__call__ didn't record command: %s.\n"
            "Buffer: %s" % (cmd, runner.commands.__str__()))

    ret = runner(os.path.join, 'foo', 'bar')
    eq_(ret, None)

    assert_greater(
        runner.commands.__str__().find('join'), -1,
        "Dry run of Runner.__call__ didn't record function join()."
        "Buffer: %s" % runner.commands.__str__())
    assert_greater(
        runner.commands.__str__().find('args='), -1,
        "Dry run of Runner.__call__ didn't record function join()."
        "Buffer: %s" % runner.commands.__str__())
Ejemplo n.º 20
0
def test_duecredit(path):
    # Just to check that no obvious side-effects
    run = Runner(cwd=path).run
    cmd = [
        sys.executable, "-c",
        "from datalad.support.gitrepo import GitRepo; GitRepo(%r, create=True)"
        % path
    ]

    env = os.environ.copy()

    # Test with duecredit not enabled for sure
    env.pop('DUECREDIT_ENABLE', None)
    # Alternative workaround for what to be fixed by
    # https://github.com/datalad/datalad/pull/3215
    # where underlying datalad process might issue a warning since our temp
    # cwd is not matching possibly present PWD env variable
    env.pop('PWD', None)

    out, err = run(cmd, env=env, expect_stderr=True)
    outs = out + err  # Let's not depend on where duecredit decides to spit out
    # All quiet
    eq_(outs, '')

    # and now enable DUECREDIT - output could come to stderr
    env['DUECREDIT_ENABLE'] = '1'
    out, err = run(cmd, env=env, expect_stderr=True)
    outs = out + err

    if external_versions['duecredit']:
        assert_in('Data management and distribution platform', outs)
    else:
        eq_(outs, '')
Ejemplo n.º 21
0
def test_script_shims():
    runner = Runner()
    for script in [
        'datalad',
        'git-annex-remote-datalad-archives',
        'git-annex-remote-datalad']:
        if not on_windows:
            # those must be available for execution, and should not contain
            which, _ = runner(['which', script])
            # test if there is no easy install shim in there
            with open(which.rstrip()) as f:
                content = f.read()
        else:
            from distutils.spawn import find_executable
            content = find_executable(script)
        assert_not_in('EASY', content) # NOTHING easy should be there
        assert_not_in('pkg_resources', content)

        # and let's check that it is our script
        out, err = runner([script, '--version'])
        version = (out + err).splitlines()[0].split(' ', 1)[1]
        # we can get git and non git .dev version... so for now
        # relax
        get_numeric_portion = lambda v: [x for x in v.split('.') if x.isdigit()]
        # extract numeric portion
        assert get_numeric_portion(version) # that my lambda is correctish
        assert_equal(get_numeric_portion(__version__),
                     get_numeric_portion(version))
Ejemplo n.º 22
0
def test_our_metadataset_search(tdir):
    # smoke test for basic search operations on our super-megadataset
    # expensive operation but ok
    ds = install(path=tdir,
                 source=DATASETS_TOPURL,
                 result_xfm='datasets',
                 return_type='item-or-list')
    res_haxby = list(ds.search('haxby'))
    assert_greater(len(res_haxby), 10)
    # default search should be case insensitive
    # but somehow it is not fully -- we get 12 here
    #res_Haxby = list(ds.search('Haxby'))
    #eq_(len(res_haxby), len(res_Haxby))

    assert_result_count(ds.search('id:873a6eae-7ae6-11e6-a6c8-002590f97d84',
                                  mode='textblob'),
                        1,
                        type='dataset',
                        path=op.join(ds.path, 'crcns', 'pfc-2'))

    # there is a problem with argparse not decoding into utf8 in PY2
    from datalad.cmdline.tests.test_main import run_main
    # TODO: make it into an independent lean test
    from datalad.cmd import Runner
    out, err = Runner(cwd=ds.path)('datalad search Buzsáki')
    assert_in('crcns/pfc-2 ', out)  # has it in description
    # and then another aspect: this entry it among multiple authors, need to
    # check if aggregating them into a searchable entity was done correctly
    assert_in('crcns/hc-1 ', out)
Ejemplo n.º 23
0
def test_our_metadataset_search(tdir):
    # TODO renable when a dataset with new aggregated metadata is
    # available at some public location
    raise SkipTest
    # smoke test for basic search operations on our super-megadataset
    # expensive operation but ok
    #ds = install(
    #    path=tdir,
    #    # TODO renable test when /// metadata actually conforms to the new metadata
    #    #source="///",
    #    source="smaug:/mnt/btrfs/datasets-meta6-4/datalad/crawl",
    #    result_xfm='datasets', return_type='item-or-list')
    assert list(ds.search('haxby'))
    assert_result_count(ds.search('id:873a6eae-7ae6-11e6-a6c8-002590f97d84',
                                  mode='textblob'),
                        1,
                        type='dataset',
                        path=opj(ds.path, 'crcns', 'pfc-2'))

    # there is a problem with argparse not decoding into utf8 in PY2
    from datalad.cmdline.tests.test_main import run_main
    # TODO: make it into an independent lean test
    from datalad.cmd import Runner
    out, err = Runner(cwd=ds.path)('datalad search Buzsáki')
    assert_in('crcns/pfc-2 ', out)  # has it in description
    # and then another aspect: this entry it among multiple authors, need to
    # check if aggregating them into a searchable entity was done correctly
    assert_in('crcns/hc-1 ', out)
Ejemplo n.º 24
0
def test_runner_failure(dir_):
    runner = Runner()
    with assert_raises(CommandError) as cme:
        runner.run(
            py2cmd('import sys; sys.exit(53)')
        )
    eq_(53, cme.exception.code)
Ejemplo n.º 25
0
    def populate(self):

        super(SubmoduleDataset, self).populate()
        # add submodules
        annex = BasicAnnexTestRepo()
        annex.create()
        from datalad.cmd import Runner
        runner = Runner()
        kw = dict(cwd=self.path, expect_stderr=True)
        runner.run(['git', 'submodule', 'add', annex.url, 'sub1'], **kw)
        runner.run(['git', 'submodule', 'add', annex.url, 'sub2'], **kw)
        runner.run(['git', 'commit', '-m', 'Added sub1 and sub2.'], **kw)
        runner.run(['git', 'submodule', 'update', '--init', '--recursive'], **kw)
        # init annex in subdatasets
        for s in ('sub1', 'sub2'):
            runner.run(['git', 'annex', 'init'],
                       cwd=opj(self.path, s), expect_stderr=True)
Ejemplo n.º 26
0
 def setup(self):
     self.runner = Runner()
     # older versions might not have it
     try:
         from datalad.cmd import GitRunner
         self.git_runner = GitRunner()
     except ImportError:
         pass
Ejemplo n.º 27
0
 def __init__(self, dataset=None, dataset_only=False):
     # store in a simple dict
     # no subclassing, because we want to be largely read-only, and implement
     # config writing separately
     self._store = {}
     self._dataset = dataset
     self._dataset_only = dataset_only
     # Since configs could contain sensitive information, to prevent
     # any "facilitated" leakage -- just disable loging of outputs for
     # this runner
     run_kwargs = dict(log_outputs=False)
     if dataset is not None:
         # make sure we run the git config calls in the dataset
         # to pick up the right config files
         run_kwargs['cwd'] = dataset.path
     self._runner = Runner(**run_kwargs)
     self.reload()
Ejemplo n.º 28
0
def test_runner_dry(tempfile):

    runner = Runner(dry=True)

    # test dry command call
    cmd = 'echo Testing dry run > %s' % tempfile
    ret = runner.run(cmd)
    assert_equal(("DRY", "DRY"), ret, "Dry run of: %s resulted in output %s" % (cmd, ret))
    assert_greater(runner.commands.__str__().find('echo Testing dry run'), -1,
                 "Dry run of: %s resulted in buffer: %s" % (cmd, runner.commands.__str__()))
    assert_false(os.path.exists(tempfile))

    # test dry python function call
    output = runner.call(os.path.join, 'foo', 'bar')
    assert_is(None, output, "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output)
    assert_greater(runner.commands.__str__().find('join'), -1,
                   "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" % runner.commands.__str__())
Ejemplo n.º 29
0
def _move2store(storepath, d):
    # make a bare clone of it into a local that matches the organization
    # of a ria dataset store
    store_loc = str(storepath / d.id[:3] / d.id[3:])
    d.repo.call_git(['clone', '--bare', d.path, store_loc])
    d.siblings('configure', name='store', url=str(store_loc),
               result_renderer='disabled')
    Runner(cwd=store_loc).run(['git', 'update-server-info'])
Ejemplo n.º 30
0
class runner(SuprocBenchmarks):
    """Some rudimentary tests to see if there is no major slowdowns from Runner
    """
    def setup(self):
        from datalad.cmd import Runner
        self.runner = Runner()
        # older versions might not have it
        try:
            from datalad.cmd import GitRunner
            self.git_runner = GitRunner()
        except ImportError:
            pass

    def time_echo(self):
        self.runner.run("echo")

    def time_echo_gitrunner(self):
        self.git_runner.run("echo")
Ejemplo n.º 31
0
def test_runner_stdout_capture():
    runner = Runner()
    test_msg = "stdout-Message"
    res = runner.run(
        py2cmd('import sys; print(%r, file=sys.stdout)' % test_msg),
        protocol=StdOutErrCapture,
    )
    eq_(res['stdout'].rstrip(), test_msg)
    ok_(not res['stderr'])
Ejemplo n.º 32
0
def test_create(path):
    ds = Dataset(path)
    ds.create(
        description="funny",
        # custom git init option
        initopts=dict(shared='world'))
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git', 'annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
Ejemplo n.º 33
0
class RunnerSuite(SuprocBenchmarks):
    """Some rudimentary tests to see if there is no major slowdowns from Runner
    """

    def setup(self):
        from datalad.cmd import Runner
        self.runner = Runner()
        # older versions might not have it
        try:
            from datalad.cmd import GitRunner
            self.git_runner = GitRunner()
        except ImportError:
            pass

    def time_echo(self):
        self.runner.run("echo")

    def time_echo_gitrunner(self):
        self.git_runner.run("echo")
Ejemplo n.º 34
0
def check_runner_heavy_output(log_online):
    # TODO: again, no automatic detection of this resulting in being stucked yet.

    runner = Runner()
    cmd = '%s -c "import datalad.tests.heavyoutput;"' % sys.executable
    with swallow_outputs() as cm:
        ret = runner.run(cmd, log_stderr=False, log_stdout=False, expect_stderr=True)
        eq_(cm.err, cm.out) # they are identical in that script
        eq_(cm.out[:10], "[0, 1, 2, ")
        eq_(cm.out[-15:], "997, 998, 999]\n")

    #do it again with capturing:
    ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True)

    # and now original problematic command with a massive single line
    if not log_online:
        # We know it would get stuck in online mode
        cmd = '%s -c "import sys; x=str(list(range(1000))); [(sys.stdout.write(x), sys.stderr.write(x)) for i in xrange(100)];"' % sys.executable
        ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True)
Ejemplo n.º 35
0
def test_runner_fix_PWD(path):
    env = os.environ.copy()
    env['PWD'] = orig_cwd = os.getcwd()
    runner = Runner(cwd=path, env=env)
    res = runner.run(
        py2cmd('import os; print(os.environ["PWD"])'),
        protocol=StdOutCapture,
    )
    eq_(res['stdout'].strip(), path)  # was fixed up to point to point to cwd's path
    eq_(env['PWD'], orig_cwd)  # no side-effect
Ejemplo n.º 36
0
def test_runner_log_stderr():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd = 'echo stderr-Message should be logged >&2'
    ret = runner.run(cmd, log_stderr=True, expect_stderr=True)
    assert_equal(
        runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" %
        (cmd, runner.commands.__str__()))

    cmd = 'echo stderr-Message should not be logged >&2'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stderr=False)
            eq_(cmo.err.rstrip(), "stderr-Message should not be logged")
            eq_(cml.out, "")
    assert_equal(
        runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" %
        (cmd, runner.commands.__str__()))
Ejemplo n.º 37
0
def test_runner(tempfile):
    runner = Runner()
    content = 'Testing real run' if on_windows else 'Testing äöü東 real run'
    cmd = 'echo %s > %s' % (content, tempfile)
    res = runner.run(cmd)
    # no capture of any kind, by default
    ok_(not res['stdout'])
    ok_(not res['stderr'])
    ok_file_has_content(tempfile, content, strip=True)
    os.unlink(tempfile)
Ejemplo n.º 38
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    # but because we override HOME the HOME on the remote end would be
    # different even though a localhost. So we need to query it
    from datalad import ssh_manager
    ssh = ssh_manager.get_connection('localhost')
    remote_home, err = ssh('pwd')
    assert not err
    remote_home = remote_home.rstrip('\n')
    dst_relpath = os.path.relpath(dst_path, remote_home)
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.add('sub.dat')
    ds.create_sibling(url, ui=True)
    published = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    # "Settings" such as UI do not persist, so we specify it again
    # for the test below depending on it
    ds.create_sibling(url, existing='replace', ui=True)
    published2 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.add('sub2.dat')
    published3 = ds.publish(to='localhost',
                            transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)

    assert_postupdate_hooks(dst_path)
Ejemplo n.º 39
0
def test_runner_instance_callable_wet():

    runner = Runner()
    cmd = [sys.executable, "-c", "print('Testing')"]

    out = runner(cmd)
    eq_(out[0].rstrip(), ('Testing'))
    eq_(out[1], '')

    ret = runner(os.path.join, 'foo', 'bar')
    eq_(ret, os.path.join('foo', 'bar'))
Ejemplo n.º 40
0
def test_runner_cwd_encoding(path):
    env = os.environ.copy()
    # Add PWD to env so that runner will temporarily adjust it to point to cwd.
    env['PWD'] = os.getcwd()
    cwd = Path(path) / OBSCURE_FILENAME
    cwd.mkdir()
    # Running doesn't fail if cwd or env has unicode value.
    Runner(cwd=cwd, env=env).run(
        py2cmd(
            'from pathlib import Path; (Path.cwd() / "foo").write_text("t")'))
    (cwd / 'foo').exists()
Ejemplo n.º 41
0
def test_create(path):
    ds = Dataset(path)
    ds.create(
        description="funny",
        # custom git init option
        initopts=dict(shared='world'))
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git', 'annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'),
        ds.id)
Ejemplo n.º 42
0
def test_create(path):
    ds = Dataset(path)
    ds.create(description="funny",
              native_metadata_type=['bim', 'bam', 'bum'],
              shared_access='world')
    ok_(ds.is_installed())
    ok_clean_git(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git-annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
    assert_equal(ds.config.get_value('datalad.metadata', 'nativetype'),
                 ('bim', 'bam', 'bum'))
Ejemplo n.º 43
0
Archivo: run.py Proyecto: hanke/datalad
def _execute_command(command, pwd, expected_exit=None):
    from datalad.cmd import Runner

    exc = None
    cmd_exitcode = None
    runner = Runner(cwd=pwd)
    try:
        lgr.info("== Command start (output follows) =====")
        runner.run(
            command,
            # immediate output
            log_online=True,
            # not yet sure what we should do with the command output
            # IMHO `run` itself should be very silent and let the command talk
            log_stdout=False,
            log_stderr=False,
            expect_stderr=True,
            expect_fail=True,
            # TODO stdin
        )
    except CommandError as e:
        # strip our own info from the exception. The original command output
        # went to stdout/err -- we just have to exitcode in the same way
        exc = e
        cmd_exitcode = e.code

        if expected_exit is not None and expected_exit != cmd_exitcode:
            # we failed in a different way during a rerun.  This can easily
            # happen if we try to alter a locked file
            #
            # TODO add the ability to `git reset --hard` the dataset tree on failure
            # we know that we started clean, so we could easily go back, needs gh-1424
            # to be able to do it recursively
            raise exc

    lgr.info("== Command exit (modification check follows) =====")
    return cmd_exitcode or 0, exc
Ejemplo n.º 44
0
 def __init__(self, dataset=None, dataset_only=False):
     # store in a simple dict
     # no subclassing, because we want to be largely read-only, and implement
     # config writing separately
     self._store = {}
     self._dataset_path = dataset.path if dataset else None
     self._dataset_only = dataset_only
     # Since configs could contain sensitive information, to prevent
     # any "facilitated" leakage -- just disable logging of outputs for
     # this runner
     run_kwargs = dict(log_outputs=False)
     if dataset is not None:
         # make sure we run the git config calls in the dataset
         # to pick up the right config files
         run_kwargs['cwd'] = dataset.path
     self._runner = Runner(**run_kwargs)
     self.reload()
Ejemplo n.º 45
0
from datalad.cmd import Runner
from datalad.support.protocol import DryRunProtocol
import logging
import time
import json

# Original SATA interface ones
# DRIVES_PREFIX = "ata-ST4000NM0033-9ZM170_S1Z0"

# SAS 6TB
DRIVES_PREFIX = "scsi-35000c50084"

lgr = logging.getLogger("datalad.benchmarking")

run = Runner()
dryrun = Runner()

def get_drives(prefix, even=True):
    """Return letters for corresponding drives"""
    drives = [d for d in glob.glob('/dev/disk/by-id/%s*' % prefix)
              if not '-part' in d]
    drives = sorted([
        os.path.realpath(d)[-3:]
        for d in drives
        ])
    for d in drives:
        assert(d.startswith('sd'))
    if even and len(drives) > 1:
        drives = drives[:len(drives)//2*2]
    assert(len(drives) == 1 or len(drives) % 2 == 0)
    return drives
    def __call__(sshurl, target=None, target_dir=None,
                 target_url=None, target_pushurl=None,
                 dataset=None, recursive=False,
                 existing='raise', shared=False):

        if sshurl is None:
            raise ValueError("""insufficient information for target creation
            (needs at least a dataset and a SSH URL).""")

        if target is None and (target_url is not None
                               or target_pushurl is not None):
            raise ValueError("""insufficient information for adding the target
            as a sibling (needs at least a name)""")

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError("""No dataset found
                                 at or above {0}.""".format(getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))
        assert(ds is not None and sshurl is not None)

        if not ds.is_installed():
            raise ValueError("""Dataset {0} is not installed yet.""".format(ds))
        assert(ds.repo is not None)

        # determine target parameters:
        parsed_target = urlparse(sshurl)
        host_name = parsed_target.netloc

        # TODO: Sufficient to fail on this condition?
        if not parsed_target.netloc:
            raise ValueError("Malformed URL: {0}".format(sshurl))

        if target_dir is None:
            if parsed_target.path:
                target_dir = parsed_target.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = False
        if "%NAME" not in target_dir:
            replicate_local_structure = True

        # collect datasets to use:
        datasets = dict()
        datasets[basename(ds.path)] = ds
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                # TODO: when enhancing Dataset/*Repo classes and therefore
                # adapt to moved code, make proper distinction between name and
                # path of a submodule, which are technically different. This
                # probably will become important on windows as well as whenever
                # we want to allow for moved worktrees.
                datasets[basename(ds.path) + '/' + subds] = \
                    Dataset(sub_path)

        # setup SSH Connection:
        # TODO: Make the entire setup a helper to use it when pushing via
        # publish?

        # - build control master:
        from datalad.utils import assure_dir
        not_supported_on_windows("TODO")
        from os import geteuid  # Linux specific import
        var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid()
        assure_dir(var_run_user_datalad)
        control_path = "%s/%s" % (var_run_user_datalad, host_name)
        control_path += ":%s" % parsed_target.port if parsed_target.port else ""

        # - start control master:
        cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \
              "-o ControlPersist=yes %s exit" % (control_path, host_name)
        lgr.debug("Try starting control master by calling:\n%s" % cmd)
        import subprocess
        proc = subprocess.Popen(cmd, shell=True)
        proc.communicate(input="\n")  # why the f.. this is necessary?

        runner = Runner()
        ssh_cmd = ["ssh", "-S", control_path, host_name]

        lgr.info("Creating target datasets ...")
        for current_dataset in datasets:
            if not replicate_local_structure:
                path = target_dir.replace("%NAME",
                                          current_dataset.replace("/", "-"))
            else:
                # TODO: opj depends on local platform, not the remote one.
                # check how to deal with it. Does windows ssh server accept
                # posix paths? vice versa? Should planned SSH class provide
                # tools for this issue?
                path = normpath(opj(target_dir,
                                    relpath(datasets[current_dataset].path,
                                            start=ds.path)))

            if path != '.':
                # check if target exists
                # TODO: Is this condition valid for != '.' only?
                path_exists = True
                cmd = ssh_cmd + ["ls", path]
                try:
                    out, err = runner.run(cmd, expect_fail=True,
                                          expect_stderr=True)
                except CommandError as e:
                    if "No such file or directory" in e.stderr and \
                                    path in e.stderr:
                        path_exists = False
                    else:
                        raise  # It's an unexpected failure here

                if path_exists:
                    if existing == 'raise':
                        raise RuntimeError(
                            "Target directory %s already exists." % path)
                    elif existing == 'skip':
                        continue
                    elif existing == 'replace':
                        pass
                    else:
                        raise ValueError("Do not know how to hand existing=%s" % repr(existing))

                cmd = ssh_cmd + ["mkdir", "-p", path]
                try:
                    runner.run(cmd)
                except CommandError as e:
                    lgr.error("Remotely creating target directory failed at "
                              "%s.\nError: %s" % (path, str(e)))
                    continue

            # init git repo
            cmd = ssh_cmd + ["git", "-C", path, "init"]
            if shared:
                cmd.append("--shared=%s" % shared)
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Remotely initializing git repository failed at %s."
                          "\nError: %s\nSkipping ..." % (path, str(e)))
                continue

            # check git version on remote end:
            cmd = ssh_cmd + ["git", "version"]
            try:
                out, err = runner.run(cmd)
                git_version = out.lstrip("git version").strip()
                lgr.debug("Detected git version on server: %s" % git_version)
                if git_version < "2.4":
                    lgr.error("Git version >= 2.4 needed to configure remote."
                              " Version detected on server: %s\nSkipping ..."
                              % git_version)
                    continue

            except CommandError as e:
                lgr.warning(
                    "Failed to determine git version on remote.\n"
                    "Error: {0}\nTrying to configure anyway "
                    "...".format(e.message))

            # allow for pushing to checked out branch
            cmd = ssh_cmd + ["git", "-C", path, "config",
                             "receive.denyCurrentBranch",
                             "updateInstead"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.warning("git config failed at remote location %s.\n"
                            "You will not be able to push to checked out "
                            "branch." % path)

            # enable post-update hook:
            cmd = ssh_cmd + ["mv", opj(path, ".git/hooks/post-update.sample"),
                             opj(path, ".git/hooks/post-update")]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to enable post update hook.\n"
                          "Error: %s" % e.message)

            # initially update server info "manually":
            cmd = ssh_cmd + ["git", "-C", path, "update-server-info"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to update server info.\n"
                          "Error: %s" % e.message)

        # stop controlmaster (close ssh connection):
        cmd = ["ssh", "-O", "stop", "-S", control_path, host_name]
        out, err = runner.run(cmd, expect_stderr=True)

        if target:
            # add the sibling(s):
            if target_url is None:
                target_url = sshurl
            if target_pushurl is None:
                target_pushurl = sshurl
            result_adding = AddSibling()(dataset=ds,
                                         name=target,
                                         url=target_url,
                                         pushurl=target_pushurl,
                                         recursive=recursive,
                                         force=existing in {'replace'})
Ejemplo n.º 47
0
def test_clone(src, tempdir):
    # Verify that all our repos are clonable
    r = Runner()
    output = r.run(["git" , "clone", src, tempdir], log_online=True)
    #status, output = getstatusoutput("git clone %(src)s %(tempdir)s" % locals())
    ok_(os.path.exists(os.path.join(tempdir, ".git")))
Ejemplo n.º 48
0
class runner(SuprocBenchmarks):
    """Some rudimentary tests to see if there is no major slowdowns from Runner
    """

    def setup(self):
        self.runner = Runner()
        # older versions might not have it
        try:
            from datalad.cmd import GitRunner
            self.git_runner = GitRunner()
        except ImportError:
            pass

    def time_echo(self):
        self.runner.run("echo")

    def time_echo_gitrunner(self):
        self.git_runner.run("echo")

    # Following "track" measures computing overhead comparing to the simplest
    # os.system call on the same command without carrying for in/out

    unit = "% overhead"

    def _get_overhead(self, cmd, nrepeats=3, **run_kwargs):
        """Estimate overhead over running command via the simplest os.system
        and to not care about any output
        """
        # asv does not repeat tracking ones I think, so nrepeats
        overheads = []
        for _ in range(nrepeats):
            t0 = time()
            os.system(cmd + " >/dev/null 2>&1")
            t1 = time()
            self.runner.run(cmd, **run_kwargs)
            t2 = time()
            overhead = 100 * ((t2 - t1) / (t1 - t0) - 1.0)
            # print("O :", t1 - t0, t2 - t0, overhead)
            overheads.append(overhead)
        overhead = round(sum(overheads) / len(overheads), 2)
        #overhead = round(min(overheads), 2)
        return overhead

    def track_overhead_echo(self):
        return self._get_overhead("echo")

    # 100ms chosen below as providing some sensible stability for me.
    # at 10ms -- too much variability
    def track_overhead_100ms(self):
        return self._get_overhead("sleep 0.1")

    def track_overhead_heavyout(self):
        # run busyloop for 100ms outputing as much as it could
        return self._get_overhead(heavyout_cmd)

    def track_overhead_heavyout_online_through(self):
        return self._get_overhead(heavyout_cmd,
                                  log_stderr='offline',  # needed to would get stuck
                                  log_online=True)

    def track_overhead_heavyout_online_process(self):
        return self._get_overhead(heavyout_cmd,
                                  log_stdout=lambda s: '',
                                  log_stderr='offline',  # needed to would get stuck
                                  log_online=True)

    # # Probably not really interesting, and good lord wobbles around 0
    # def track_overhead_heavyout_offline(self):
    #     return self._get_overhead(heavyout_cmd,
    #                               log_stdout='offline',
    #                               log_stderr='offline')

    # TODO: track the one with in/out, i.e. for those BatchedProcesses
Ejemplo n.º 49
0
    def __call__(dataset=None, name=None, url=None,
                 pushurl=None, recursive=False, force=False):

        # TODO: Detect malformed URL and fail?

        if name is None or (url is None and pushurl is None):
            raise ValueError("""insufficient information to add a sibling
                (needs at least a dataset, a name and an URL).""")
        if url is None:
            url = pushurl

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError(
                        "No dataset found at or above {0}.".format(getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))

        assert(ds is not None and name is not None and url is not None)

        if not ds.is_installed():
            raise ValueError("Dataset {0} is not installed yet.".format(ds))
        assert(ds.repo is not None)

        ds_basename = basename(ds.path)
        repos = {
            ds_basename: {'repo': ds.repo}
        }
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                repos[ds_basename + '/' + subds] = {
#                repos[subds] = {
                    'repo': GitRepo(sub_path, create=False)
                }

        # Note: This is copied from create_publication_target_sshwebserver
        # as it is the same logic as for its target_dir.
        # TODO: centralize and generalize template symbol handling
        # TODO: Check pushurl for template symbols too. Probably raise if only
        #       one of them uses such symbols

        replicate_local_structure = False
        if "%NAME" not in url:
            replicate_local_structure = True

        for repo in repos:
            if not replicate_local_structure:
                repos[repo]['url'] = url.replace("%NAME",
                                                 repo.replace("/", "-"))
                if pushurl:
                    repos[repo]['pushurl'] = pushurl.replace("%NAME",
                                                             repo.replace("/",
                                                                          "-"))
            else:
                repos[repo]['url'] = url
                if pushurl:
                    repos[repo]['pushurl'] = pushurl

                if repo != ds_basename:
                    repos[repo]['url'] = _urljoin(repos[repo]['url'], repo[len(ds_basename)+1:])
                    if pushurl:
                        repos[repo]['pushurl'] = _urljoin(repos[repo]['pushurl'], repo[len(ds_basename)+1:])

        # collect existing remotes:
        already_existing = list()
        conflicting = list()
        for repo in repos:
            if name in repos[repo]['repo'].git_get_remotes():
                already_existing.append(repo)
                lgr.debug("""Remote '{0}' already exists
                          in '{1}'.""".format(name, repo))

                existing_url = repos[repo]['repo'].git_get_remote_url(name)
                existing_pushurl = \
                    repos[repo]['repo'].git_get_remote_url(name, push=True)

                if repos[repo]['url'].rstrip('/') != existing_url.rstrip('/') \
                        or (pushurl and existing_pushurl and
                            repos[repo]['pushurl'].rstrip('/') !=
                                    existing_pushurl.rstrip('/')) \
                        or (pushurl and not existing_pushurl):
                    conflicting.append(repo)

        if not force and conflicting:
            raise RuntimeError("Sibling '{0}' already exists with conflicting"
                               " URL for {1} dataset(s). {2}".format(
                                   name, len(conflicting), conflicting))

        runner = Runner()
        successfully_added = list()
        for repo in repos:
            if repo in already_existing:
                if repo not in conflicting:
                    lgr.debug("Skipping {0}. Nothing to do.".format(repo))
                    continue
                # rewrite url
                cmd = ["git", "remote", "set-url", name, repos[repo]['url']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            else:
                # add the remote
                cmd = ["git", "remote", "add", name, repos[repo]['url']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            if pushurl:
                cmd = ["git", "remote", "set-url", "--push", name,
                       repos[repo]['pushurl']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            successfully_added.append(repo)

        return successfully_added
Ejemplo n.º 50
0
 def populate(self):
     super(NestedDataset, self).populate()
     ds = SubmoduleDataset()
     ds.create()
     from datalad.cmd import Runner
     runner = Runner()
     kw = dict(expect_stderr=True)
     runner.run(['git', 'submodule', 'add', ds.url, 'subdataset'],
                cwd=self.path, **kw)
     runner.run(['git', 'submodule', 'add', ds.url, 'subsubdataset'],
                cwd=opj(self.path, 'subdataset'), **kw)
     runner.run(['git', 'commit', '-m', 'Added subdataset.'],
                cwd=opj(self.path, 'subdataset'), **kw)
     runner.run(['git', 'commit', '-a', '-m', 'Added subdatasets.'],
                cwd=self.path, **kw)
     runner.run(['git', 'submodule', 'update', '--init', '--recursive'],
                cwd=self.path, **kw)
     # init all annexes
     for s in ('', 'subdataset', opj('subdataset', 'subsubdataset')):
         runner.run(['git', 'annex', 'init'],
                    cwd=opj(self.path, s), expect_stderr=True)
Ejemplo n.º 51
0
class ConfigManager(object):
    """Thin wrapper around `git-config` with support for a dataset configuration.

    The general idea is to have an object that is primarily used to read/query
    configuration option.  Upon creation, current configuration is read via one
    (or max two, in the case of the presence of dataset-specific configuration)
    calls to `git config`.  If this class is initialized with a Dataset
    instance, it supports reading and writing configuration from
    ``.datalad/config`` inside a dataset too. This file is committed to Git and
    hence useful to ship certain configuration items with a dataset.

    The API aims to provide the most significant read-access API of a
    dictionary, the Python ConfigParser, and GitPython's config parser
    implementations.

    This class is presently not capable of efficiently writing multiple
    configurations items at once.  Instead, each modification results in a
    dedicated call to `git config`. This author thinks this is OK, as he
    cannot think of a situation where a large number of items need to be
    written during normal operation. If such need arises, various solutions are
    possible (via GitPython, or an independent writer).

    Any DATALAD_* environment variable is also presented as a configuration
    item. Settings read from environment variables are not stored in any of the
    configuration file, but are read dynamically from the environment at each
    `reload()` call. Their values take precedence over any specification in
    configuration files.

    Parameters
    ----------
    dataset : Dataset, optional
      If provided, all `git config` calls are executed in this dataset's
      directory. Moreover, any modifications are, by default, directed to
      this dataset's configuration file (which will be created on demand)
    dataset_only : bool
      If True, configuration items are only read from a datasets persistent
      configuration file, if any present (the one in ``.datalad/config``, not
      ``.git/config``).
    """
    def __init__(self, dataset=None, dataset_only=False):
        # store in a simple dict
        # no subclassing, because we want to be largely read-only, and implement
        # config writing separately
        self._store = {}
        self._dataset_path = dataset.path if dataset else None
        self._dataset_only = dataset_only
        # Since configs could contain sensitive information, to prevent
        # any "facilitated" leakage -- just disable logging of outputs for
        # this runner
        run_kwargs = dict(log_outputs=False)
        if dataset is not None:
            # make sure we run the git config calls in the dataset
            # to pick up the right config files
            run_kwargs['cwd'] = dataset.path
        self._runner = Runner(**run_kwargs)
        self.reload()

    def reload(self):
        """Reload all configuration items from the configured sources"""
        self._store = {}
        # 2-step strategy:
        #   - load datalad dataset config from dataset
        #   - load git config from all supported by git sources
        # in doing so we always stay compatible with where Git gets its
        # config from, but also allow to override persistent information
        # from dataset locally or globally
        if self._dataset_path:
            # now any dataset config
            dscfg_fname = opj(self._dataset_path, '.datalad', 'config')
            if exists(dscfg_fname):
                stdout, stderr = self._run(['-z', '-l', '--file', dscfg_fname],
                                           log_stderr=True)
                # overwrite existing value, do not amend to get multi-line
                # values
                self._store = _parse_gitconfig_dump(
                    stdout, self._store, replace=False)

        if not self._dataset_only:
            stdout, stderr = self._run(['-z', '-l'], log_stderr=True)
            self._store = _parse_gitconfig_dump(
                stdout, self._store, replace=True)

            # override with environment variables
            self._store = _parse_env(self._store)

    @_where_reload
    def obtain(self, var, default=None, dialog_type=None, valtype=None,
               store=False, where=None, reload=True, **kwargs):
        """
        Convenience method to obtain settings interactively, if needed

        A UI will be used to ask for user input in interactive sessions.
        Questions to ask, and additional explanations can be passed directly
        as arguments, or retrieved from a list of pre-configured items.

        Additionally, this method allows for type conversion and storage
        of obtained settings. Both aspects can also be pre-configured.

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them,
          e.g. 'core.editor'
        default : any type
          In interactive sessions and if `store` is True, this default value
          will be presented to the user for confirmation (or modification).
          In all other cases, this value will be silently assigned unless
          there is an existing configuration setting.
        dialog_type : {'question', 'yesno', None}
          Which dialog type to use in interactive sessions. If `None`,
          pre-configured UI options are used.
        store : bool
          Whether to store the obtained value (or default)
        %s
        `**kwargs`
          Additional arguments for the UI function call, such as a question
          `text`.
        """
        # do local import, as this module is import prominently and the
        # could theroetically import all kind of weired things for type
        # conversion
        from datalad.interface.common_cfg import definitions as cfg_defs
        # fetch what we know about this variable
        cdef = cfg_defs.get(var, {})
        # type conversion setup
        if valtype is None and 'type' in cdef:
            valtype = cdef['type']
        if valtype is None:
            valtype = lambda x: x

        # any default?
        if default is None and 'default' in cdef:
            default = cdef['default']

        _value = None
        if var in self:
            # nothing needs to be obtained, it is all here already
            _value = self[var]
        elif store is False and default is not None:
            # nothing will be stored, and we have a default -> no user confirmation
            # we cannot use logging, because we want to use the config to confiugre
            # the logging
            #lgr.debug('using default {} for config setting {}'.format(default, var))
            _value = default

        if _value is not None:
            # we got everything we need and can exit early
            try:
                return valtype(_value)
            except Exception as e:
                raise ValueError(
                    "value '{}' of existing configuration for '{}' cannot be "
                    "converted to the desired type '{}' ({})".format(
                        _value, var, valtype, exc_str(e)))

        # now we need to try to obtain something from the user
        from datalad.ui import ui

        # configure UI
        dialog_opts = kwargs
        if dialog_type is None:  # no override
            # check for common knowledge on how to obtain a value
            if 'ui' in cdef:
                dialog_type = cdef['ui'][0]
                # pull standard dialog settings
                dialog_opts = cdef['ui'][1]
                # update with input
                dialog_opts.update(kwargs)

        if (not ui.is_interactive or dialog_type is None) and default is None:
            raise RuntimeError(
                "cannot obtain value for configuration item '{}', "
                "not preconfigured, no default, no UI available".format(var))

        if not hasattr(ui, dialog_type):
            raise ValueError("UI '{}' does not support dialog type '{}'".format(
                ui, dialog_type))

        # configure storage destination, if needed
        if store:
            if where is None and 'destination' in cdef:
                where = cdef['destination']
            if where is None:
                raise ValueError(
                    "request to store configuration item '{}', but no "
                    "storage destination specified".format(var))

        # obtain via UI
        dialog = getattr(ui, dialog_type)
        _value = dialog(default=default, **dialog_opts)

        if _value is None:
            # we got nothing
            if default is None:
                raise RuntimeError(
                    "could not obtain value for configuration item '{}', "
                    "not preconfigured, no default".format(var))
            # XXX maybe we should return default here, even it was returned
            # from the UI -- if that is even possible

        # execute type conversion before storing to check that we got
        # something that looks like what we want
        try:
            value = valtype(_value)
        except Exception as e:
            raise ValueError(
                "cannot convert user input `{}` to desired type ({})".format(
                    _value, exc_str(e)))
            # XXX we could consider "looping" until we have a value of proper
            # type in case of a user typo...

        if store:
            # store value as it was before any conversion, needs to be str
            # anyway
            # needs string conversion nevertheless, because default could come
            # in as something else
            self.add(var, '{}'.format(_value), where=where, reload=reload)
        return value

    #
    # Compatibility with dict API
    #
    def __len__(self):
        return len(self._store)

    def __getitem__(self, key):
        return self._store.__getitem__(key)

    def __contains__(self, key):
        return self._store.__contains__(key)

    def keys(self):
        """Returns list of configuration item names"""
        return self._store.keys()

    def get(self, key, default=None):
        """D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None."""
        return self._store.get(key, default)

    #
    # Compatibility with ConfigParser API
    #
    def sections(self):
        """Returns a list of the sections available"""
        return list(set([cfg_section_regex.match(k).group(1) for k in self._store]))

    def options(self, section):
        """Returns a list of options available in the specified section."""
        opts = []
        for k in self._store:
            sec, opt = cfg_sectionoption_regex.match(k).groups()
            if sec == section:
                opts.append(opt)
        return opts

    def has_section(self, section):
        """Indicates whether a section is present in the configuration"""
        for k in self._store:
            if k.startswith(section):
                return True
        return False

    def has_option(self, section, option):
        """If the given section exists, and contains the given option"""
        for k in self._store:
            sec, opt = cfg_sectionoption_regex.match(k).groups()
            if sec == section and opt == option:
                return True
        return False

    def getint(self, section, option):
        """A convenience method which coerces the option value to an integer"""
        return int(self.get_value(section, option))

    def getbool(self, section, option, default=None):
        """A convenience method which coerces the option value to a bool

        Values "on", "yes", "true" and any int!=0 are considered True
        Values which evaluate to bool False, "off", "no", "false" are considered
        False
        TypeError is raised for other values.
        """
        val = self.get_value(section, option, default=default)
        return anything2bool(val)

    def getfloat(self, section, option):
        """A convenience method which coerces the option value to a float"""
        return float(self.get_value(section, option))

    # this is a hybrid of ConfigParser and dict API
    def items(self, section=None):
        """Return a list of (name, value) pairs for each option

        Optionally limited to a given section.
        """
        if section is None:
            return self._store.items()
        return [(k, v) for k, v in self._store.items()
                if cfg_section_regex.match(k).group(1) == section]

    #
    # Compatibility with GitPython's ConfigParser
    #
    def get_value(self, section, option, default=None):
        """Like `get()`, but with an optional default value

        If the default is not None, the given default value will be returned in
        case the option did not exist. This behavior immitates GitPython's
        config parser.
        """
        try:
            return self['.'.join((section, option))]
        except KeyError as e:
            # this strange dance is needed because gitpython does it this way
            if default is not None:
                return default
            else:
                raise e

    #
    # Modify configuration (proxy respective git-config call)
    #
    @_where_reload
    def _run(self, args, where=None, reload=False, **kwargs):
        """Centralized helper to run "git config" calls

        Parameters
        ----------
        args : list
          Arguments to pass for git config
        %s
        **kwargs
          Keywords arguments for Runner's call
        """
        if where:
            args = self._get_location_args(where) + args
        out = self._runner.run(['git', 'config'] + args, **kwargs)
        if reload:
            self.reload()
        return out

    def _get_location_args(self, where, args=None):
        if args is None:
            args = []
        cfg_labels = ('dataset', 'local', 'global')
        if where not in cfg_labels:
            raise ValueError(
                "unknown configuration label '{}' (not in {})".format(
                    where, cfg_labels))
        if where == 'dataset':
            if not self._dataset_path:
                raise ValueError(
                    'ConfigManager cannot store to configuration to dataset, none specified')
            # create an empty config file if none exists, `git config` will
            # fail otherwise
            dscfg_dirname = opj(self._dataset_path, '.datalad')
            dscfg_fname = opj(dscfg_dirname, 'config')
            if not exists(dscfg_dirname):
                os.makedirs(dscfg_dirname)
            if not exists(dscfg_fname):
                open(dscfg_fname, 'w').close()
            args.extend(['--file', opj(self._dataset_path, '.datalad', 'config')])
        elif where == 'global':
            args.append('--global')
        elif where == 'local':
            args.append('--local')
        return args

    @_where_reload
    def add(self, var, value, where='dataset', reload=True):
        """Add a configuration variable and value

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them, e.g.
          'core.editor'
        value : str
          Variable value
        %s"""
        self._run(['--add', var, value], where=where, reload=reload, log_stderr=True)

    @_where_reload
    def set(self, var, value, where='dataset', reload=True, force=False):
        """Set a variable to a value.

        In opposition to `add`, this replaces the value of `var` if there is
        one already.

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them, e.g.
          'core.editor'
        value : str
          Variable value
        force: bool
          if set, replaces all occurrences of `var` by a single one with the
          given `value`. Otherwise raise if multiple entries for `var` exist
          already
        %s"""
        from datalad.support.gitrepo import to_options

        self._run(to_options(replace_all=force) + [var, value],
                  where=where, reload=reload, log_stderr=True)

    @_where_reload
    def rename_section(self, old, new, where='dataset', reload=True):
        """Rename a configuration section

        Parameters
        ----------
        old : str
          Name of the section to rename.
        new : str
          Name of the section to rename to.
        %s"""
        self._run(['--rename-section', old, new], where=where, reload=reload)

    @_where_reload
    def remove_section(self, sec, where='dataset', reload=True):
        """Rename a configuration section

        Parameters
        ----------
        sec : str
          Name of the section to remove.
        %s"""
        self._run(['--remove-section', sec], where=where, reload=reload)

    @_where_reload
    def unset(self, var, where='dataset', reload=True):
        """Remove all occurrences of a variable

        Parameters
        ----------
        var : str
          Name of the variable to remove
        %s"""
        # use unset all as it is simpler for now
        self._run(['--unset-all', var], where=where, reload=reload)