def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_GitRepo_get_merge_base(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') assert_raises(ValueError, repo.get_merge_base, []) branch1 = repo.get_active_branch() branch1_hexsha = repo.get_hexsha() eq_(len(branch1_hexsha), 40) eq_(repo.get_merge_base(branch1), branch1_hexsha) # Let's create a detached branch branch2 = "_detach_" repo.checkout(branch2, options=["--orphan"]) # it will have all the files # Must not do: https://github.com/gitpython-developers/GitPython/issues/375 # repo.git_add('.') repo.add('*') # NOTE: fun part is that we should have at least a different commit message # so it results in a different checksum ;) repo.commit("committing again") assert (repo.get_indexed_files()) # we did commit assert (repo.get_merge_base(branch1) is None) assert (repo.get_merge_base([branch2, branch1]) is None) # Let's merge them up -- then merge base should match the master repo.merge(branch1, allow_unrelated=True) eq_(repo.get_merge_base(branch1), branch1_hexsha) # if points to some empty/non-existing branch - should also be None assert (repo.get_merge_base(['nonexistent', branch2]) is None)
def test_GitRepo_get_merge_base(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') assert_raises(ValueError, repo.get_merge_base, []) branch1 = repo.get_active_branch() branch1_hexsha = repo.get_hexsha() eq_(len(branch1_hexsha), 40) eq_(repo.get_merge_base(branch1), branch1_hexsha) # Let's create a detached branch branch2 = "_detach_" repo.checkout(branch2, options=["--orphan"]) # it will have all the files # Must not do: https://github.com/gitpython-developers/GitPython/issues/375 # repo.git_add('.') repo.add('*') # NOTE: fun part is that we should have at least a different commit message # so it results in a different checksum ;) repo.commit("committing again") assert(repo.get_indexed_files()) # we did commit assert(repo.get_merge_base(branch1) is None) assert(repo.get_merge_base([branch2, branch1]) is None) # Let's merge them up -- then merge base should match the master repo.merge(branch1, allow_unrelated=True) eq_(repo.get_merge_base(branch1), branch1_hexsha) # if points to some empty/non-existing branch - should also be None assert(repo.get_merge_base(['nonexistent', branch2]) is None)
def test_fake_dates(path): gr = GitRepo(path, create=True, fake_dates=True) gr.add("foo") gr.commit("commit foo") seconds_initial = gr.config.obtain("datalad.fake-dates-start") # First commit is incremented by 1 second. eq_(seconds_initial + 1, gr.get_commit_date()) # The second commit by 2. gr.add("bar") gr.commit("commit bar") eq_(seconds_initial + 2, gr.get_commit_date()) # If we checkout another branch, its time is still based on the latest # timestamp in any local branch. gr.checkout("other", options=["--orphan"]) with open(op.join(path, "baz"), "w") as ofh: ofh.write("baz content") gr.add("baz") gr.commit("commit baz") eq_(gr.get_active_branch(), "other") eq_(seconds_initial + 3, gr.get_commit_date())
def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_fake_dates(path): gr = GitRepo(path, create=True, fake_dates=True) gr.add("foo") gr.commit("commit foo") seconds_initial = gr.config.obtain("datalad.fake-dates-start") # First commit is incremented by 1 second. eq_(seconds_initial + 1, gr.get_commit_date()) # The second commit by 2. gr.add("bar") gr.commit("commit bar") eq_(seconds_initial + 2, gr.get_commit_date()) # If we checkout another branch, its time is still based on the latest # timestamp in any local branch. gr.checkout("other", options=["--orphan"]) with open(op.join(path, "baz"), "w") as ofh: ofh.write("baz content") gr.add("baz") gr.commit("commit baz") eq_(gr.get_active_branch(), "other") eq_(seconds_initial + 3, gr.get_commit_date())
def test_get_commit_date(path): gr = GitRepo(path, create=True) eq_(gr.get_commit_date(), None) # Let's make a commit with a custom date DATE = "Wed Mar 14 03:47:30 2018 -0000" DATE_EPOCH = 1520999250 gr.add('1') gr.commit("committed", date=DATE) gr = GitRepo(path, create=True) date = gr.get_commit_date() neq_(date, None) eq_(date, DATE_EPOCH) eq_(date, gr.get_commit_date('master')) # and even if we get into a detached head gr.checkout(gr.get_hexsha()) eq_(gr.get_active_branch(), None) eq_(date, gr.get_commit_date('master'))
def test_get_commit_date(path): gr = GitRepo(path, create=True) eq_(gr.get_commit_date(), None) # Let's make a commit with a custom date DATE = "Wed Mar 14 03:47:30 2018 -0000" DATE_EPOCH = 1520999250 gr.add('1') gr.commit("committed", date=DATE) gr = GitRepo(path, create=True) date = gr.get_commit_date() neq_(date, None) eq_(date, DATE_EPOCH) eq_(date, gr.get_commit_date('master')) # and even if we get into a detached head gr.checkout(gr.get_hexsha()) eq_(gr.get_active_branch(), None) eq_(date, gr.get_commit_date('master'))
def _install_subds_from_flexible_source(ds, sm, **kwargs): """Tries to obtain a given subdataset from several meaningful locations Parameters ---------- ds : Dataset Parent dataset of to-be-installed subdataset. sm : dict Submodule record as produced by `subdatasets()`. **kwargs Passed onto clone() """ sm_path = op.relpath(sm['path'], start=sm['parentds']) # compose a list of candidate clone URLs clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm) # prevent inevitable exception from `clone` dest_path = op.join(ds.path, sm_path) clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path] if not clone_urls: # yield error yield get_status_dict( action='install', ds=ds, status='error', message=("Have got no candidates to install subdataset %s from.", sm_path), logger=lgr, ) return for res in clone_dataset(clone_urls_, Dataset(dest_path), cfg=ds.config, **kwargs): # make sure to fix a detached HEAD before yielding the install success # result. The resetting of the branch would undo any change done # to the repo by processing in response to the result if res.get('action', None) == 'install' and \ res.get('status', None) == 'ok' and \ res.get('type', None) == 'dataset' and \ res.get('path', None) == dest_path: _fixup_submodule_dotgit_setup(ds, sm_path) target_commit = sm['gitshasum'] lgr.debug( "Update cloned subdataset {0} in parent".format(dest_path)) section_name = 'submodule.{}'.format(sm['gitmodule_name']) # do not use `git-submodule update --init`, it would make calls # to git-config which will not obey datalad inter-process locks for # modifying .git/config sub = GitRepo(res['path']) # record what branch we were on right after the clone # TODO instead of the active branch, this should first consider # a configured branch in the submodule record of the superdataset sub_orig_branch = sub.get_active_branch() # if we are on a branch this hexsha will be the tip of that branch sub_orig_hexsha = sub.get_hexsha() if sub_orig_hexsha != target_commit: # make sure we have the desired commit locally # expensive and possibly error-prone fetch conditional on cheap # local check if not sub.commit_exists(target_commit): try: sub.fetch(remote='origin', refspec=target_commit) except CommandError: pass # instead of inspecting the fetch results for possible ways # with which it could failed to produced the desired result # let's verify the presence of the commit directly, we are in # expensive-land already anyways if not sub.commit_exists(target_commit): res.update( status='error', message= ('Target commit %s does not exist in the clone, and ' 'a fetch that commit from origin failed', target_commit[:8]), ) yield res # there is nothing we can do about this # MIH thinks that removing the clone is not needed, as a likely # next step will have to be a manual recovery intervention # and not another blind attempt continue # checkout the desired commit sub.call_git(['checkout', target_commit]) # did we detach? # XXX: This is a less generic variant of a part of # GitRepo.update_submodule(). It makes use of already available # information and trusts the existence of the just cloned repo # and avoids (redoing) some safety checks if sub_orig_branch and not sub.get_active_branch(): # trace if current state is a predecessor of the branch_hexsha lgr.debug( "Detached HEAD after updating submodule %s " "(original branch: %s)", sub, sub_orig_branch) if sub.get_merge_base([sub_orig_hexsha, target_commit]) == target_commit: # TODO: config option? # MIH: There is no real need here. IMHO this should all not # happen, unless the submodule record has a branch # configured. And Datalad should leave such a record, when # a submodule is registered. # we assume the target_commit to be from the same branch, # because it is an ancestor -- update that original branch # to point to the target_commit, and update HEAD to point to # that location -- this readies the subdataset for # further modification lgr.info( "Reset subdataset branch '%s' to %s (from %s) to " "avoid a detached HEAD", sub_orig_branch, target_commit[:8], sub_orig_hexsha[:8]) branch_ref = 'refs/heads/%s' % sub_orig_branch sub.update_ref(branch_ref, target_commit) sub.update_ref('HEAD', branch_ref, symbolic=True) else: lgr.warning( "%s has a detached HEAD, because the recorded " "subdataset state %s has no unique ancestor with " "branch '%s'", sub, target_commit[:8], sub_orig_branch) # register the submodule as "active" in the superdataset ds.config.set( '{}.active'.format(section_name), 'true', reload=False, force=True, where='local', ) ds.config.set( '{}.url'.format(section_name), # record the actual source URL of the successful clone # and not a funky prediction based on the parent ds # like ds.repo.update_submodule() would do (does not # accept a URL) res['source']['giturl'], reload=True, force=True, where='local', ) yield res subds = Dataset(dest_path) if not subds.is_installed(): lgr.debug('Desired subdataset %s did not materialize, stopping', subds) return # check whether clone URL generators were involved cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)] if cand_cfg: # get a handle on the configuration that is specified in the # dataset itself (local and dataset) super_cfg = ConfigManager(dataset=ds, source='dataset-local') need_reload = False for rec in cand_cfg: # check whether any of this configuration originated from the # superdataset. if so, inherit the config in the new subdataset # clone. if not, keep things clean in order to be able to move with # any outside configuration change for c in ('datalad.get.subdataset-source-candidate-{}{}'.format( rec['cost'], rec['name']), 'datalad.get.subdataset-source-candidate-{}'.format( rec['name'])): if c in super_cfg.keys(): subds.config.set(c, super_cfg.get(c), where='local', reload=False) need_reload = True break if need_reload: subds.config.reload(force=True)