コード例 #1
0
ファイル: test_update.py プロジェクト: nicholsn/datalad
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH],
        ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
コード例 #2
0
ファイル: test_utils.py プロジェクト: seldamat/datalad
def test_knows_annex(here, there):
    from datalad.support.gitrepo import GitRepo
    from datalad.support.annexrepo import AnnexRepo
    GitRepo(path=here, create=True)
    assert_false(knows_annex(here))
    AnnexRepo(path=here, create=True)
    assert_true(knows_annex(here))
    GitRepo.clone(path=there, url=here, create=True)
    assert_true(knows_annex(there))
コード例 #3
0
ファイル: update.py プロジェクト: debanjum/datalad
def _update_repo(repo, remote, merge, fetch_all):
    # fetch remote(s):
    repo.fetch(remote=remote, all_=fetch_all)

    # if `repo` is an annex and we didn't fetch the entire remote
    # anyway, explicitly fetch git-annex branch:

    # TODO: This isn't correct. `fetch_all` fetches all remotes.
    # Apparently, we currently fetch an entire remote anyway. Is this
    # what we want? Do we want to specify a refspec instead?
    # yoh: we should leave it to git and its configuration.
    # So imho we should just extract to fetch everything git would fetch
    if knows_annex(repo.path) and not fetch_all:
        if remote:
            # we are updating from a certain remote, so git-annex branch
            # should be updated from there as well:
            repo.fetch(remote=remote)
            # TODO: what does failing here look like?
        else:
            # we have no remote given, therefore
            # check for tracking branch's remote:

            track_remote, track_branch = repo.get_tracking_branch()
            if track_remote:
                # we have a "tracking remote"
                repo.fetch(remote=track_remote)

    # merge:
    if merge:
        lgr.info("Applying changes from tracking branch...")
        # TODO: Adapt.
        # TODO: Rethink default remote/tracking branch. See above.
        # We need a "tracking remote" but custom refspec to fetch from
        # that remote
        cmd_list = ["git", "pull"]
        if remote:
            cmd_list.append(remote)
            # branch needed, if not default remote
            # => TODO: use default remote/tracking branch to compare
            #          (see above, where git-annex is fetched)
            # => TODO: allow for passing a branch
            # (or more general refspec?)
            # For now, just use the same name
            cmd_list.append(repo.get_active_branch())

        std_out, std_err = repo._git_custom_command('', cmd_list)
        lgr.info(std_out)
        if knows_annex(repo.path):
            # annex-apply:
            lgr.info("Updating annex ...")
            std_out, std_err = repo._git_custom_command(
                '', ["git", "annex", "merge"])
            lgr.info(std_out)
コード例 #4
0
ファイル: update.py プロジェクト: debanjum/datalad
def _update_repo(repo, remote, merge, fetch_all):
            # fetch remote(s):
            repo.fetch(remote=remote, all_=fetch_all)

            # if `repo` is an annex and we didn't fetch the entire remote
            # anyway, explicitly fetch git-annex branch:

            # TODO: This isn't correct. `fetch_all` fetches all remotes.
            # Apparently, we currently fetch an entire remote anyway. Is this
            # what we want? Do we want to specify a refspec instead?
            # yoh: we should leave it to git and its configuration.
            # So imho we should just extract to fetch everything git would fetch
            if knows_annex(repo.path) and not fetch_all:
                if remote:
                    # we are updating from a certain remote, so git-annex branch
                    # should be updated from there as well:
                    repo.fetch(remote=remote)
                    # TODO: what does failing here look like?
                else:
                    # we have no remote given, therefore
                    # check for tracking branch's remote:

                    track_remote, track_branch = repo.get_tracking_branch()
                    if track_remote:
                        # we have a "tracking remote"
                        repo.fetch(remote=track_remote)

            # merge:
            if merge:
                lgr.info("Applying changes from tracking branch...")
                # TODO: Adapt.
                # TODO: Rethink default remote/tracking branch. See above.
                # We need a "tracking remote" but custom refspec to fetch from
                # that remote
                cmd_list = ["git", "pull"]
                if remote:
                    cmd_list.append(remote)
                    # branch needed, if not default remote
                    # => TODO: use default remote/tracking branch to compare
                    #          (see above, where git-annex is fetched)
                    # => TODO: allow for passing a branch
                    # (or more general refspec?)
                    # For now, just use the same name
                    cmd_list.append(repo.get_active_branch())

                std_out, std_err = repo._git_custom_command('', cmd_list)
                lgr.info(std_out)
                if knows_annex(repo.path):
                    # annex-apply:
                    lgr.info("Updating annex ...")
                    std_out, std_err = repo._git_custom_command(
                        '', ["git", "annex", "merge"])
                    lgr.info(std_out)
コード例 #5
0
ファイル: test_update.py プロジェクト: datalad/datalad
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert(knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), ['master'])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin._git_custom_command([], ['git', 'config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin._git_custom_command([], ['git', 'branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/master'], ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
コード例 #6
0
ファイル: utils.py プロジェクト: debanjum/datalad
def _handle_possible_annex_dataset(dataset, reckless):
    # in any case check whether we need to annex-init the installed thing:
    if knows_annex(dataset.path):
        # init annex when traces of a remote annex can be detected
        if reckless:
            lgr.debug(
                "Instruct annex to hardlink content in %s from local "
                "sources, if possible (reckless)", dataset.path)
            dataset.config.add(
                'annex.hardlink', 'true', where='local', reload=True)
        lgr.debug("Initializing annex repo at %s", dataset.path)
        repo = AnnexRepo(dataset.path, init=True)
        if reckless:
            repo._run_annex_command('untrust', annex_options=['here'])
コード例 #7
0
def _handle_possible_annex_dataset(dataset, reckless):
    # in any case check whether we need to annex-init the installed thing:
    if knows_annex(dataset.path):
        # init annex when traces of a remote annex can be detected
        if reckless:
            lgr.debug(
                "Instruct annex to hardlink content in %s from local "
                "sources, if possible (reckless)", dataset.path)
            dataset.config.add('annex.hardlink',
                               'true',
                               where='local',
                               reload=True)
        lgr.debug("Initializing annex repo at %s", dataset.path)
        repo = AnnexRepo(dataset.path, init=True)
        if reckless:
            repo._run_annex_command('untrust', annex_options=['here'])
コード例 #8
0
 def _get_new_vcs(ds, source, vcs):
     if source is None:
         # always come with annex when created from scratch
         lgr.info("Creating a new annex repo at %s", ds.path)
         vcs = AnnexRepo(ds.path, url=source, create=True)
     else:
         # when obtained from remote, try with plain Git
         lgr.info("Creating a new git repo at %s", ds.path)
         vcs = GitRepo(ds.path, url=source, create=True)
         if knows_annex(ds.path):
             # init annex when traces of a remote annex can be detected
             lgr.info("Initializing annex repo at %s", ds.path)
             vcs = AnnexRepo(ds.path, init=True)
         else:
             lgr.debug("New repository clone has no traces of an annex")
     return vcs
コード例 #9
0
ファイル: utils.py プロジェクト: datalad/datalad
def _handle_possible_annex_dataset(dataset, reckless, description=None):
    """If dataset "knows annex" -- annex init it, set into reckless etc

    Provides additional tune up to a possibly an annex repo, e.g.
    "enables" reckless mode, sets up description
    """
    # in any case check whether we need to annex-init the installed thing:
    if not knows_annex(dataset.path):
        # not for us
        return

    # init annex when traces of a remote annex can be detected
    if reckless:
        lgr.debug(
            "Instruct annex to hardlink content in %s from local "
            "sources, if possible (reckless)", dataset.path)
        dataset.config.add(
            'annex.hardlink', 'true', where='local', reload=True)
    lgr.debug("Initializing annex repo at %s", dataset.path)
    # XXX this is rather convoluted, init does init, but cannot
    # set a description without `create=True`
    repo = AnnexRepo(dataset.path, init=True)
    # so do manually see #1403
    if description:
        repo._init(description=description)
    if reckless:
        repo._run_annex_command('untrust', annex_options=['here'])
    # go through list of special remotes and issue info message that
    # some additional ones are present and were not auto-enabled
    remote_names = repo.get_remotes(
        with_urls_only=False,
        exclude_special_remotes=False)
    for k, v in repo.get_special_remotes().items():
        sr_name = v.get('name', None)
        if sr_name and sr_name not in remote_names:
            # if it is not listed among the remotes, it wasn't enabled
            lgr.info(
                'access to dataset sibling "%s" not auto-enabled, enable with:\n\t\tdatalad siblings -d "%s" enable -s %s',
                sr_name,
                dataset.path,
                sr_name)
コード例 #10
0
ファイル: utils.py プロジェクト: shots47s/datalad
def _handle_possible_annex_dataset(dataset, reckless, description=None):
    """If dataset "knows annex" -- annex init it, set into reckless etc

    Provides additional tune up to a possibly an annex repo, e.g.
    "enables" reckless mode, sets up description
    """
    # in any case check whether we need to annex-init the installed thing:
    if not knows_annex(dataset.path):
        # not for us
        return

    # init annex when traces of a remote annex can be detected
    if reckless:
        lgr.debug(
            "Instruct annex to hardlink content in %s from local "
            "sources, if possible (reckless)", dataset.path)
        dataset.config.add('annex.hardlink',
                           'true',
                           where='local',
                           reload=True)
    lgr.debug("Initializing annex repo at %s", dataset.path)
    # XXX this is rather convoluted, init does init, but cannot
    # set a description without `create=True`
    repo = AnnexRepo(dataset.path, init=True)
    # so do manually see #1403
    if description:
        repo._init(description=description)
    if reckless:
        repo._run_annex_command('untrust', annex_options=['here'])
    # go through list of special remotes and issue info message that
    # some additional ones are present and were not auto-enabled
    remote_names = repo.get_remotes(with_urls_only=False,
                                    exclude_special_remotes=False)
    for k, v in repo.get_special_remotes().items():
        sr_name = v.get('name', None)
        if sr_name and sr_name not in remote_names:
            # if it is not listed among the remotes, it wasn't enabled
            lgr.info(
                'access to dataset sibling "%s" not auto-enabled, enable with:\n\t\tdatalad siblings -d "%s" enable -s %s',
                sr_name, dataset.path, sr_name)
コード例 #11
0
ファイル: utils.py プロジェクト: yarikoptic/datalad
def _handle_possible_annex_dataset(dataset, reckless, description=None):
    """If dataset "knows annex" -- annex init it, set into reckless etc
    
    Provides additional tune up to a possibly an annex repo, e.g.
    "enables" reckless mode, sets up description
    """
    # in any case check whether we need to annex-init the installed thing:
    if knows_annex(dataset.path):
        # init annex when traces of a remote annex can be detected
        if reckless:
            lgr.debug(
                "Instruct annex to hardlink content in %s from local "
                "sources, if possible (reckless)", dataset.path)
            dataset.config.add(
                'annex.hardlink', 'true', where='local', reload=True)
        lgr.debug("Initializing annex repo at %s", dataset.path)
        # XXX this is rather convoluted, init does init, but cannot
        # set a description without `create=True`
        repo = AnnexRepo(dataset.path, init=True)
        # so do manually see #1403
        if description:
            repo._init(description=description)
        if reckless:
            repo._run_annex_command('untrust', annex_options=['here'])
コード例 #12
0
def postclonecfg_annexdataset(ds, reckless, description=None):
    """If ds "knows annex" -- annex init it, set into reckless etc

    Provides additional tune up to a possibly an annex repo, e.g.
    "enables" reckless mode, sets up description
    """
    # in any case check whether we need to annex-init the installed thing:
    if not knows_annex(ds.path):
        # not for us
        return

    # init annex when traces of a remote annex can be detected
    if reckless == 'auto':
        lgr.debug(
            "Instruct annex to hardlink content in %s from local "
            "sources, if possible (reckless)", ds.path)
        ds.config.set(
            'annex.hardlink', 'true', where='local', reload=True)

    lgr.debug("Initializing annex repo at %s", ds.path)
    # Note, that we cannot enforce annex-init via AnnexRepo().
    # If such an instance already exists, its __init__ will not be executed.
    # Therefore do quick test once we have an object and decide whether to call
    # its _init().
    #
    # Additionally, call init if we need to add a description (see #1403),
    # since AnnexRepo.__init__ can only do it with create=True
    repo = AnnexRepo(ds.path, init=True)
    if not repo.is_initialized() or description:
        repo._init(description=description)
    if reckless == 'auto' or (reckless and reckless.startswith('shared-')):
        repo.call_annex(['untrust', 'here'])

    elif reckless == 'ephemeral':
        # with ephemeral we declare 'here' as 'dead' right away, whenever
        # we symlink origin's annex, since availability from 'here' should
        # not be propagated for an ephemeral clone when we publish back to
        # origin.
        # This will cause stuff like this for a locally present annexed file:
        # % git annex whereis d1
        # whereis d1 (0 copies) failed
        # BUT this works:
        # % git annex find . --not --in here
        # % git annex find . --in here
        # d1

        # we don't want annex copy-to origin
        ds.config.set(
            'remote.origin.annex-ignore', 'true',
            where='local')

        ds.repo.set_remote_dead('here')

        if check_symlink_capability(ds.repo.dot_git / 'dl_link_test',
                                    ds.repo.dot_git / 'dl_target_test'):
            # symlink the annex to avoid needless copies in an ephemeral clone
            annex_dir = ds.repo.dot_git / 'annex'
            origin_annex_url = ds.config.get("remote.origin.url", None)
            origin_git_path = None
            if origin_annex_url:
                try:
                    # Deal with file:// scheme URLs as well as plain paths.
                    # If origin isn't local, we have nothing to do.
                    origin_git_path = Path(RI(origin_annex_url).localpath)

                    # we are local; check for a bare repo first to not mess w/
                    # the path
                    if GitRepo(origin_git_path, create=False).bare:
                        # origin is a bare repo -> use path as is
                        pass
                    elif origin_git_path.name != '.git':
                        origin_git_path /= '.git'
                except ValueError:
                    # Note, that accessing localpath on a non-local RI throws
                    # ValueError rather than resulting in an AttributeError.
                    # TODO: Warning level okay or is info level sufficient?
                    # Note, that setting annex-dead is independent of
                    # symlinking .git/annex. It might still make sense to
                    # have an ephemeral clone that doesn't propagate its avail.
                    # info. Therefore don't fail altogether.
                    lgr.warning("reckless=ephemeral mode: origin doesn't seem "
                                "local: %s\nno symlinks being used",
                                origin_annex_url)
            if origin_git_path:
                # TODO make sure that we do not delete any unique data
                rmtree(str(annex_dir)) \
                    if not annex_dir.is_symlink() else annex_dir.unlink()
                annex_dir.symlink_to(origin_git_path / 'annex',
                                     target_is_directory=True)
        else:
            # TODO: What level? + note, that annex-dead is independ
            lgr.warning("reckless=ephemeral mode: Unable to create symlinks on "
                        "this file system.")

    srs = {True: [], False: []}  # special remotes by "autoenable" key
    remote_uuids = None  # might be necessary to discover known UUIDs

    repo_config = repo.config
    # Note: The purpose of this function is to inform the user. So if something
    # looks misconfigured, we'll warn and move on to the next item.
    for uuid, config in repo.get_special_remotes().items():
        sr_name = config.get('name', None)
        if sr_name is None:
            lgr.warning(
                'Ignoring special remote %s because it does not have a name. '
                'Known information: %s',
                uuid, config)
            continue
        sr_autoenable = config.get('autoenable', False)
        try:
            sr_autoenable = ensure_bool(sr_autoenable)
        except ValueError:
            lgr.warning(
                'Failed to process "autoenable" value %r for sibling %s in '
                'dataset %s as bool.'
                'You might need to enable it later manually and/or fix it up to'
                ' avoid this message in the future.',
                sr_autoenable, sr_name, ds.path)
            continue

        # If it looks like a type=git special remote, make sure we have up to
        # date information. See gh-2897.
        if sr_autoenable and repo_config.get("remote.{}.fetch".format(sr_name)):
            try:
                repo.fetch(remote=sr_name)
            except CommandError as exc:
                lgr.warning("Failed to fetch type=git special remote %s: %s",
                            sr_name, exc_str(exc))

        # determine whether there is a registered remote with matching UUID
        if uuid:
            if remote_uuids is None:
                remote_uuids = {
                    # Check annex-config-uuid first. For sameas annex remotes,
                    # this will point to the UUID for the configuration (i.e.
                    # the key returned by get_special_remotes) rather than the
                    # shared UUID.
                    (repo_config.get('remote.%s.annex-config-uuid' % r) or
                     repo_config.get('remote.%s.annex-uuid' % r))
                    for r in repo.get_remotes()
                }
            if uuid not in remote_uuids:
                srs[sr_autoenable].append(sr_name)

    if srs[True]:
        lgr.debug(
            "configuration for %s %s added because of autoenable,"
            " but no UUIDs for them yet known for dataset %s",
            # since we are only at debug level, we could call things their
            # proper names
            single_or_plural("special remote",
                             "special remotes", len(srs[True]), True),
            ", ".join(srs[True]),
            ds.path
        )

    if srs[False]:
        # if has no auto-enable special remotes
        lgr.info(
            'access to %s %s not auto-enabled, enable with:\n'
            '\t\tdatalad siblings -d "%s" enable -s %s',
            # but since humans might read it, we better confuse them with our
            # own terms!
            single_or_plural("dataset sibling",
                             "dataset siblings", len(srs[False]), True),
            ", ".join(srs[False]),
            ds.path,
            srs[False][0] if len(srs[False]) == 1 else "SIBLING",
        )

    # we have just cloned the repo, so it has 'origin', configure any
    # reachable origin of origins
    yield from configure_origins(ds, ds)
コード例 #13
0
ファイル: update.py プロジェクト: silky/datalad
    def __call__(name=None,
                 dataset=None,
                 merge=False,
                 recursive=False,
                 fetch_all=False,
                 reobtain_data=False):
        """
        """
        # TODO: Is there an 'update filehandle' similar to install and publish?
        # What does it mean?

        if reobtain_data:
            # TODO: properly define, what to do
            raise NotImplementedError("TODO: Option '--reobtain-data' not "
                                      "implemented yet.")

        # shortcut
        ds = require_dataset(dataset, check_installed=True, purpose='updating')
        assert (ds.repo is not None)

        repos_to_update = [ds.repo]
        if recursive:
            repos_to_update += [
                GitRepo(opj(ds.path, sub_path), create=False)
                for sub_path in ds.get_subdatasets(recursive=True,
                                                   fulfilled=True)
            ]
        # only work on those which are installed

        # TODO: current implementation disregards submodules organization,
        #  it just updates/merge each one individually whenever in the simplest
        #  case we just need  a call to
        # git submodule update --recursive
        #  if name was not provided, and there is no --merge
        # If we do --merge we should at the end call save
        for repo in repos_to_update:
            # get all remotes which have references (would exclude
            # special remotes)
            remotes = repo.get_remotes(with_refs_only=True)
            if not remotes:
                lgr.debug("No siblings known to dataset at %s\nSkipping",
                          repo.path)
                continue
            if name and name not in remotes:
                lgr.warning("'%s' not known to dataset %s\nSkipping", name,
                            repo.path)
                continue

            # Currently '--merge' works for single remote only:
            # TODO: - condition still incomplete
            #       - We can merge if a remote was given or there is a
            #         tracking branch
            #       - we also can fetch all remotes independently on whether or
            #         not we merge a certain remote
            if not name and len(remotes) > 1 and merge:
                lgr.debug("Found multiple remotes:\n%s" % remotes)
                raise NotImplementedError("No merge strategy for multiple "
                                          "remotes implemented yet.")
            lgr.info("Updating dataset '%s' ..." % repo.path)

            # fetch remote(s):
            repo.fetch(remote=name, all_=fetch_all)

            # if `repo` is an annex and we didn't fetch the entire remote
            # anyway, explicitly fetch git-annex branch:

            # TODO: This isn't correct. `fetch_all` fetches all remotes.
            # Apparently, we currently fetch an entire remote anyway. Is this
            # what we want? Do we want to specify a refspec instead?
            # yoh: we should leave it to git and its configuration.
            # So imho we should just extract to fetch everything git would fetch
            if knows_annex(repo.path) and not fetch_all:
                if name:
                    # we are updating from a certain remote, so git-annex branch
                    # should be updated from there as well:
                    repo.fetch(remote=name)
                    # TODO: what does failing here look like?
                else:
                    # we have no remote given, therefore
                    # check for tracking branch's remote:

                    track_remote, track_branch = repo.get_tracking_branch()
                    if track_remote:
                        # we have a "tracking remote"
                        repo.fetch(remote=track_remote)

            # merge:
            if merge:
                lgr.info("Applying changes from tracking branch...")
                # TODO: Adapt.
                # TODO: Rethink default remote/tracking branch. See above.
                # We need a "tracking remote" but custom refspec to fetch from
                # that remote
                cmd_list = ["git", "pull"]
                if name:
                    cmd_list.append(name)
                    # branch needed, if not default remote
                    # => TODO: use default remote/tracking branch to compare
                    #          (see above, where git-annex is fetched)
                    # => TODO: allow for passing a branch
                    # (or more general refspec?)
                    # For now, just use the same name
                    cmd_list.append(repo.get_active_branch())

                std_out, std_err = repo._git_custom_command('', cmd_list)
                lgr.info(std_out)
                if knows_annex(repo.path):
                    # annex-apply:
                    lgr.info("Updating annex ...")
                    std_out, std_err = repo._git_custom_command(
                        '', ["git", "annex", "merge"])
                    lgr.info(std_out)
コード例 #14
0
ファイル: update.py プロジェクト: glalteva/datalad
    def __call__(name=None, dataset=None,
                 merge=False, recursive=False, fetch_all=False,
                 reobtain_data=False):
        """
        """
        # TODO: Is there an 'update filehandle' similar to install and publish?
        # What does it mean?

        if reobtain_data:
            # TODO: properly define, what to do
            raise NotImplementedError("TODO: Option '--reobtain-data' not "
                                      "implemented yet.")

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)

        # if we have no dataset given, figure out which one we need to operate
        # on, based on the current working directory of the process:
        if ds is None:
            # try to find a dataset at or above PWD:
            dspath = GitRepo.get_toppath(getpwd())
            if dspath is None:
                raise ValueError("No dataset found at %s." % getpwd())
            ds = Dataset(dspath)
        assert(ds is not None)

        if not ds.is_installed():
            raise ValueError("No installed dataset found at "
                             "{0}.".format(ds.path))
        assert(ds.repo is not None)

        repos_to_update = [ds.repo]
        if recursive:
            repos_to_update += [GitRepo(opj(ds.path, sub_path))
                                for sub_path in
                                ds.get_dataset_handles(recursive=True)]

        for repo in repos_to_update:
            # get all remotes:
            remotes = repo.git_get_remotes()
            if name and name not in remotes:
                lgr.warning("'%s' not known to dataset %s.\nSkipping" %
                            (name, repo.path))
                continue

            # Currently '--merge' works for single remote only:
            # TODO: - condition still incomplete
            #       - We can merge if a remote was given or there is a
            #         tracking branch
            #       - we also can fetch all remotes independently on whether or
            #         not we merge a certain remote
            if not name and len(remotes) > 1 and merge:
                lgr.debug("Found multiple remotes:\n%s" % remotes)
                raise NotImplementedError("No merge strategy for multiple "
                                          "remotes implemented yet.")
            lgr.info("Updating handle '%s' ..." % repo.path)

            # fetch remote(s):
            repo.git_fetch(name if name else '',
                           "--all" if fetch_all else '')

            # if it is an annex and there is a tracking branch, and we didn't
            # fetch the entire remote anyway, explicitly fetch git-annex
            # branch:
            # TODO: Is this logic correct? Shouldn't we fetch git-annex from
            # `name` if there is any (or if there is no tracking branch but we
            # have a `name`?
            if knows_annex(repo.path) and not fetch_all:
                # check for tracking branch's remote:
                try:
                    std_out, std_err = \
                        repo._git_custom_command('',
                        ["git", "config", "--get",
                         "branch.{active_branch}.remote".format(
                             active_branch=repo.git_get_active_branch())])
                except CommandError as e:
                    if e.code == 1 and e.stdout == "":
                        std_out = None
                    else:
                        raise
                if std_out:  # we have a "tracking remote"
                    repo.git_fetch("%s git-annex" % std_out.strip())

            # merge:
            if merge:
                lgr.info("Applying changes from tracking branch...")
                cmd_list = ["git", "pull"]
                if name:
                    cmd_list.append(name)
                    # branch needed, if not default remote
                    # => TODO: use default remote/tracking branch to compare
                    #          (see above, where git-annex is fetched)
                    # => TODO: allow for passing a branch
                    # (or more general refspec?)
                    # For now, just use the same name
                    cmd_list.append(repo.git_get_active_branch())

                out, err = repo._git_custom_command('', cmd_list)
                lgr.info(out)
                if knows_annex(repo.path):
                    # annex-apply:
                    lgr.info("Updating annex ...")
                    out, err = repo._git_custom_command('', ["git", "annex", "merge"])
                    lgr.info(out)