Exemple #1
0
def test_GitRepo_ssh_pull(remote_path, repo_path):
    from datalad import ssh_manager

    remote_repo = GitRepo(remote_path, create=True)
    url = "ssh://localhost" + op.abspath(remote_path)
    socket_path = op.join(ssh_manager.socket_dir,
                          get_connection_hash('localhost'))
    repo = GitRepo(repo_path, create=True)
    repo.add_remote("ssh-remote", url)

    # modify remote:
    remote_repo.checkout("ssh-test", ['-b'])
    with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f:
        f.write("whatever")
    remote_repo.add("ssh_testfile.dat")
    remote_repo.commit("ssh_testfile.dat added.")

    # file is not locally known yet:
    assert_not_in("ssh_testfile.dat", repo.get_indexed_files())

    # pull changes:
    repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch())
    ok_clean_git(repo.path, annex=False)

    # the connection is known to the SSH manager, since fetch() requested it:
    assert_in(socket_path, ssh_manager._connections)
    # and socket was created:
    ok_(op.exists(socket_path))

    # we actually pulled the changes
    assert_in("ssh_testfile.dat", repo.get_indexed_files())
Exemple #2
0
def test_GitRepo_get_merge_base(src):
    repo = GitRepo(src, create=True)
    with open(op.join(src, 'file.txt'), 'w') as f:
        f.write('load')
    repo.add('*')
    repo.commit('committing')

    assert_raises(ValueError, repo.get_merge_base, [])
    branch1 = repo.get_active_branch()
    branch1_hexsha = repo.get_hexsha()
    eq_(len(branch1_hexsha), 40)
    eq_(repo.get_merge_base(branch1), branch1_hexsha)

    # Let's create a detached branch
    branch2 = "_detach_"
    repo.checkout(branch2, options=["--orphan"])
    # it will have all the files
    # Must not do:  https://github.com/gitpython-developers/GitPython/issues/375
    # repo.git_add('.')
    repo.add('*')
    # NOTE: fun part is that we should have at least a different commit message
    # so it results in a different checksum ;)
    repo.commit("committing again")
    assert (repo.get_indexed_files())  # we did commit
    assert (repo.get_merge_base(branch1) is None)
    assert (repo.get_merge_base([branch2, branch1]) is None)

    # Let's merge them up -- then merge base should match the master
    repo.merge(branch1, allow_unrelated=True)
    eq_(repo.get_merge_base(branch1), branch1_hexsha)

    # if points to some empty/non-existing branch - should also be None
    assert (repo.get_merge_base(['nonexistent', branch2]) is None)
Exemple #3
0
def test_GitRepo_get_merge_base(src):
    repo = GitRepo(src, create=True)
    with open(op.join(src, 'file.txt'), 'w') as f:
        f.write('load')
    repo.add('*')
    repo.commit('committing')

    assert_raises(ValueError, repo.get_merge_base, [])
    branch1 = repo.get_active_branch()
    branch1_hexsha = repo.get_hexsha()
    eq_(len(branch1_hexsha), 40)
    eq_(repo.get_merge_base(branch1), branch1_hexsha)

    # Let's create a detached branch
    branch2 = "_detach_"
    repo.checkout(branch2, options=["--orphan"])
    # it will have all the files
    # Must not do:  https://github.com/gitpython-developers/GitPython/issues/375
    # repo.git_add('.')
    repo.add('*')
    # NOTE: fun part is that we should have at least a different commit message
    # so it results in a different checksum ;)
    repo.commit("committing again")
    assert(repo.get_indexed_files())  # we did commit
    assert(repo.get_merge_base(branch1) is None)
    assert(repo.get_merge_base([branch2, branch1]) is None)

    # Let's merge them up -- then merge base should match the master
    repo.merge(branch1, allow_unrelated=True)
    eq_(repo.get_merge_base(branch1), branch1_hexsha)

    # if points to some empty/non-existing branch - should also be None
    assert(repo.get_merge_base(['nonexistent', branch2]) is None)
Exemple #4
0
def test_fake_dates(path):
    gr = GitRepo(path, create=True, fake_dates=True)

    gr.add("foo")
    gr.commit("commit foo")

    seconds_initial = gr.config.obtain("datalad.fake-dates-start")

    # First commit is incremented by 1 second.
    eq_(seconds_initial + 1, gr.get_commit_date())

    # The second commit by 2.
    gr.add("bar")
    gr.commit("commit bar")
    eq_(seconds_initial + 2, gr.get_commit_date())

    # If we checkout another branch, its time is still based on the latest
    # timestamp in any local branch.
    gr.checkout("other", options=["--orphan"])
    with open(op.join(path, "baz"), "w") as ofh:
        ofh.write("baz content")
    gr.add("baz")
    gr.commit("commit baz")
    eq_(gr.get_active_branch(), "other")
    eq_(seconds_initial + 3, gr.get_commit_date())
Exemple #5
0
def test_GitRepo_ssh_pull(remote_path, repo_path):
    from datalad import ssh_manager

    remote_repo = GitRepo(remote_path, create=True)
    url = "ssh://localhost" + op.abspath(remote_path)
    socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost'))
    repo = GitRepo(repo_path, create=True)
    repo.add_remote("ssh-remote", url)

    # modify remote:
    remote_repo.checkout("ssh-test", ['-b'])
    with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f:
        f.write("whatever")
    remote_repo.add("ssh_testfile.dat")
    remote_repo.commit("ssh_testfile.dat added.")

    # file is not locally known yet:
    assert_not_in("ssh_testfile.dat", repo.get_indexed_files())

    # pull changes:
    repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch())
    ok_clean_git(repo.path, annex=False)

    # the connection is known to the SSH manager, since fetch() requested it:
    assert_in(socket_path, ssh_manager._connections)
    # and socket was created:
    ok_(op.exists(socket_path))

    # we actually pulled the changes
    assert_in("ssh_testfile.dat", repo.get_indexed_files())
Exemple #6
0
def test_fake_dates(path):
    gr = GitRepo(path, create=True, fake_dates=True)

    gr.add("foo")
    gr.commit("commit foo")

    seconds_initial = gr.config.obtain("datalad.fake-dates-start")

    # First commit is incremented by 1 second.
    eq_(seconds_initial + 1, gr.get_commit_date())

    # The second commit by 2.
    gr.add("bar")
    gr.commit("commit bar")
    eq_(seconds_initial + 2, gr.get_commit_date())

    # If we checkout another branch, its time is still based on the latest
    # timestamp in any local branch.
    gr.checkout("other", options=["--orphan"])
    with open(op.join(path, "baz"), "w") as ofh:
        ofh.write("baz content")
    gr.add("baz")
    gr.commit("commit baz")
    eq_(gr.get_active_branch(), "other")
    eq_(seconds_initial + 3, gr.get_commit_date())
Exemple #7
0
def test_get_commit_date(path):
    gr = GitRepo(path, create=True)
    eq_(gr.get_commit_date(), None)

    # Let's make a commit with a custom date
    DATE = "Wed Mar 14 03:47:30 2018 -0000"
    DATE_EPOCH = 1520999250
    gr.add('1')
    gr.commit("committed", date=DATE)
    gr = GitRepo(path, create=True)
    date = gr.get_commit_date()
    neq_(date, None)
    eq_(date, DATE_EPOCH)

    eq_(date, gr.get_commit_date('master'))
    # and even if we get into a detached head
    gr.checkout(gr.get_hexsha())
    eq_(gr.get_active_branch(), None)
    eq_(date, gr.get_commit_date('master'))
Exemple #8
0
def test_get_commit_date(path):
    gr = GitRepo(path, create=True)
    eq_(gr.get_commit_date(), None)

    # Let's make a commit with a custom date
    DATE = "Wed Mar 14 03:47:30 2018 -0000"
    DATE_EPOCH = 1520999250
    gr.add('1')
    gr.commit("committed", date=DATE)
    gr = GitRepo(path, create=True)
    date = gr.get_commit_date()
    neq_(date, None)
    eq_(date, DATE_EPOCH)

    eq_(date, gr.get_commit_date('master'))
    # and even if we get into a detached head
    gr.checkout(gr.get_hexsha())
    eq_(gr.get_active_branch(), None)
    eq_(date, gr.get_commit_date('master'))
Exemple #9
0
def _install_subds_from_flexible_source(ds, sm, **kwargs):
    """Tries to obtain a given subdataset from several meaningful locations

    Parameters
    ----------
    ds : Dataset
      Parent dataset of to-be-installed subdataset.
    sm : dict
      Submodule record as produced by `subdatasets()`.
    **kwargs
      Passed onto clone()
    """
    sm_path = op.relpath(sm['path'], start=sm['parentds'])
    # compose a list of candidate clone URLs
    clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm)

    # prevent inevitable exception from `clone`
    dest_path = op.join(ds.path, sm_path)
    clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path]

    if not clone_urls:
        # yield error
        yield get_status_dict(
            action='install',
            ds=ds,
            status='error',
            message=("Have got no candidates to install subdataset %s from.",
                     sm_path),
            logger=lgr,
        )
        return

    for res in clone_dataset(clone_urls_,
                             Dataset(dest_path),
                             cfg=ds.config,
                             **kwargs):
        # make sure to fix a detached HEAD before yielding the install success
        # result. The resetting of the branch would undo any change done
        # to the repo by processing in response to the result
        if res.get('action', None) == 'install' and \
                res.get('status', None) == 'ok' and \
                res.get('type', None) == 'dataset' and \
                res.get('path', None) == dest_path:
            _fixup_submodule_dotgit_setup(ds, sm_path)

            target_commit = sm['gitshasum']
            lgr.debug(
                "Update cloned subdataset {0} in parent".format(dest_path))
            section_name = 'submodule.{}'.format(sm['gitmodule_name'])
            # do not use `git-submodule update --init`, it would make calls
            # to git-config which will not obey datalad inter-process locks for
            # modifying .git/config
            sub = GitRepo(res['path'])
            # record what branch we were on right after the clone
            # TODO instead of the active branch, this should first consider
            # a configured branch in the submodule record of the superdataset
            sub_orig_branch = sub.get_active_branch()
            # if we are on a branch this hexsha will be the tip of that branch
            sub_orig_hexsha = sub.get_hexsha()
            if sub_orig_hexsha != target_commit:
                # make sure we have the desired commit locally
                # expensive and possibly error-prone fetch conditional on cheap
                # local check
                if not sub.commit_exists(target_commit):
                    try:
                        sub.fetch(remote='origin', refspec=target_commit)
                    except CommandError:
                        pass
                    # instead of inspecting the fetch results for possible ways
                    # with which it could failed to produced the desired result
                    # let's verify the presence of the commit directly, we are in
                    # expensive-land already anyways
                    if not sub.commit_exists(target_commit):
                        res.update(
                            status='error',
                            message=
                            ('Target commit %s does not exist in the clone, and '
                             'a fetch that commit from origin failed',
                             target_commit[:8]),
                        )
                        yield res
                        # there is nothing we can do about this
                        # MIH thinks that removing the clone is not needed, as a likely
                        # next step will have to be a manual recovery intervention
                        # and not another blind attempt
                        continue
                # checkout the desired commit
                sub.call_git(['checkout', target_commit])
                # did we detach?
                # XXX: This is a less generic variant of a part of
                # GitRepo.update_submodule(). It makes use of already available
                # information and trusts the existence of the just cloned repo
                # and avoids (redoing) some safety checks
                if sub_orig_branch and not sub.get_active_branch():
                    # trace if current state is a predecessor of the branch_hexsha
                    lgr.debug(
                        "Detached HEAD after updating submodule %s "
                        "(original branch: %s)", sub, sub_orig_branch)
                    if sub.get_merge_base([sub_orig_hexsha,
                                           target_commit]) == target_commit:
                        # TODO: config option?
                        # MIH: There is no real need here. IMHO this should all not
                        # happen, unless the submodule record has a branch
                        # configured. And Datalad should leave such a record, when
                        # a submodule is registered.

                        # we assume the target_commit to be from the same branch,
                        # because it is an ancestor -- update that original branch
                        # to point to the target_commit, and update HEAD to point to
                        # that location -- this readies the subdataset for
                        # further modification
                        lgr.info(
                            "Reset subdataset branch '%s' to %s (from %s) to "
                            "avoid a detached HEAD", sub_orig_branch,
                            target_commit[:8], sub_orig_hexsha[:8])
                        branch_ref = 'refs/heads/%s' % sub_orig_branch
                        sub.update_ref(branch_ref, target_commit)
                        sub.update_ref('HEAD', branch_ref, symbolic=True)
                    else:
                        lgr.warning(
                            "%s has a detached HEAD, because the recorded "
                            "subdataset state %s has no unique ancestor with "
                            "branch '%s'", sub, target_commit[:8],
                            sub_orig_branch)

            # register the submodule as "active" in the superdataset
            ds.config.set(
                '{}.active'.format(section_name),
                'true',
                reload=False,
                force=True,
                where='local',
            )
            ds.config.set(
                '{}.url'.format(section_name),
                # record the actual source URL of the successful clone
                # and not a funky prediction based on the parent ds
                # like ds.repo.update_submodule() would do (does not
                # accept a URL)
                res['source']['giturl'],
                reload=True,
                force=True,
                where='local',
            )
        yield res

    subds = Dataset(dest_path)
    if not subds.is_installed():
        lgr.debug('Desired subdataset %s did not materialize, stopping', subds)
        return

    # check whether clone URL generators were involved
    cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)]
    if cand_cfg:
        # get a handle on the configuration that is specified in the
        # dataset itself (local and dataset)
        super_cfg = ConfigManager(dataset=ds, source='dataset-local')
        need_reload = False
        for rec in cand_cfg:
            # check whether any of this configuration originated from the
            # superdataset. if so, inherit the config in the new subdataset
            # clone. if not, keep things clean in order to be able to move with
            # any outside configuration change
            for c in ('datalad.get.subdataset-source-candidate-{}{}'.format(
                    rec['cost'], rec['name']),
                      'datalad.get.subdataset-source-candidate-{}'.format(
                          rec['name'])):
                if c in super_cfg.keys():
                    subds.config.set(c,
                                     super_cfg.get(c),
                                     where='local',
                                     reload=False)
                    need_reload = True
                    break
        if need_reload:
            subds.config.reload(force=True)