def __call__( reponame, dataset=None, recursive=False, recursion_limit=None, name='github', existing='error', github_login=None, github_passwd=None, github_organization=None, access_protocol='https', publish_depends=None, dryrun=False): try: # this is an absolute leaf package, import locally to avoid # unecessary dependencies import github as gh except ImportError: raise MissingExternalDependency( 'PyGitHub', msg='GitHub-related functionality is unavailable without this package') # what to operate on ds = require_dataset( dataset, check_installed=True, purpose='create Github sibling') # gather datasets and essential info # dataset instance and mountpoint relative to the top toprocess = [(ds, '')] if recursive: for sub in ds.subdatasets( fulfilled=None, # we want to report on missing dataset in here recursive=recursive, recursion_limit=recursion_limit, result_xfm='datasets'): if not sub.is_installed(): lgr.info('Ignoring unavailable subdataset %s', sub) continue toprocess.append((sub, relpath(sub.path, start=ds.path))) # check for existing remote configuration filtered = [] for d, mp in toprocess: if name in d.repo.get_remotes(): if existing == 'error': msg = '{} already had a configured sibling "{}"'.format( d, name) if dryrun: lgr.error(msg) else: raise ValueError(msg) elif existing == 'skip': continue gh_reponame = '{}{}{}'.format( reponame, '-' if mp else '', template_fx(mp)) filtered.append((d, gh_reponame)) if not filtered: # all skipped return [] # actually make it happen on Github rinfo = _make_github_repos( gh, github_login, github_passwd, github_organization, filtered, existing, access_protocol, dryrun) # lastly configure the local datasets for d, url, existed in rinfo: if not dryrun: # first make sure that annex doesn't touch this one # but respect any existing config ignore_var = 'remote.{}.annex-ignore'.format(name) if not ignore_var in d.config: d.config.add(ignore_var, 'true', where='local') Siblings()( 'configure', dataset=d, name=name, url=url, recursive=False, # TODO fetch=True, maybe only if one existed already publish_depends=publish_depends) # TODO let submodule URLs point to Github (optional) return rinfo
def _create_dataset_sibling( name, ds, hierarchy_basepath, ssh, replicate_local_structure, ssh_url, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, install_postupdate_hook, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit ): """Everyone is very smart here and could figure out the combinatorial affluence among provided tiny (just slightly over a dozen) number of options and only a few pages of code """ localds_path = ds.path ds_name = relpath(localds_path, start=hierarchy_basepath) if not replicate_local_structure: ds_name = '' if ds_name == curdir \ else '-{}'.format(ds_name.replace("/", "-")) remoteds_path = target_dir.replace( "%RELNAME", ds_name) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? # see gh-1188 remoteds_path = normpath(opj(target_dir, ds_name)) # construct a would-be ssh url based on the current dataset's path ssh_url.path = remoteds_path ds_sshurl = ssh_url.as_str() # configure dataset's git-access urls ds_target_url = target_url.replace('%RELNAME', ds_name) \ if target_url else ds_sshurl # push, configure only if needed ds_target_pushurl = None if ds_target_url != ds_sshurl: # not guaranteed that we can push via the primary URL ds_target_pushurl = target_pushurl.replace('%RELNAME', ds_name) \ if target_pushurl else ds_sshurl lgr.info("Considering to create a target dataset {0} at {1} of {2}".format( localds_path, remoteds_path, ssh_url.hostname)) # Must be set to True only if exists and existing='reconfigure' # otherwise we might skip actions if we say existing='reconfigure' # but it did not even exist before only_reconfigure = False if remoteds_path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True try: out, err = ssh("ls {}".format(sh_quote(remoteds_path))) except CommandError as e: if "No such file or directory" in e.stderr and \ remoteds_path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: _msg = "Target path %s already exists." % remoteds_path # path might be existing but be an empty directory, which should be # ok to remove try: lgr.debug( "Trying to rmdir %s on remote since might be an empty dir", remoteds_path ) # should be safe since should not remove anything unless an empty dir ssh("rmdir {}".format(sh_quote(remoteds_path))) path_exists = False except CommandError as e: # If fails to rmdir -- either contains stuff no permissions # TODO: fixup encode/decode dance again :-/ we should have got # unicode/str here by now. I guess it is the same as # https://github.com/ReproNim/niceman/issues/83 # where I have reused this Runner thing try: # ds_name is unicode which makes _msg unicode so we must be # unicode-ready err_str = text_type(e.stderr) except UnicodeDecodeError: err_str = e.stderr.decode(errors='replace') _msg += " And it fails to rmdir (%s)." % (err_str.strip(),) if path_exists: if existing == 'error': raise RuntimeError(_msg) elif existing == 'skip': lgr.info(_msg + " Skipping") return elif existing == 'replace': lgr.info(_msg + " Replacing") # enable write permissions to allow removing dir ssh("chmod +r+w -R {}".format(sh_quote(remoteds_path))) # remove target at path ssh("rm -rf {}".format(sh_quote(remoteds_path))) # if we succeeded in removing it path_exists = False # Since it is gone now, git-annex also should forget about it remotes = ds.repo.get_remotes() if name in remotes: # so we had this remote already, we should announce it dead # XXX what if there was some kind of mismatch and this name # isn't matching the actual remote UUID? should have we # checked more carefully? lgr.info( "Announcing existing remote %s dead to annex and removing", name ) if isinstance(ds.repo, AnnexRepo): ds.repo.set_remote_dead(name) ds.repo.remove_remote(name) elif existing == 'reconfigure': lgr.info(_msg + " Will only reconfigure") only_reconfigure = True else: raise ValueError( "Do not know how to handle existing={}".format( repr(existing))) if not path_exists: ssh("mkdir -p {}".format(sh_quote(remoteds_path))) delayed_super = _DelayedSuper(ds) if inherit and delayed_super.super: if shared is None: # here we must analyze current_ds's super, not the super_ds # inherit from the setting on remote end shared = CreateSibling._get_ds_remote_shared_setting( delayed_super, name, ssh) if not install_postupdate_hook: # Even though directive from above was False due to no UI explicitly # requested, we were asked to inherit the setup, so we might need # to install the hook, if super has it on remote install_postupdate_hook = CreateSibling._has_active_postupdate( delayed_super, name, ssh) if group: # Either repository existed before or a new directory was created for it, # set its group to a desired one if was provided with the same chgrp ssh("chgrp -R {} {}".format( sh_quote(text_type(group)), sh_quote(remoteds_path))) # don't (re-)initialize dataset if existing == reconfigure if not only_reconfigure: # init git and possibly annex repo if not CreateSibling.init_remote_repo( remoteds_path, ssh, shared, ds, description=target_url): return if target_url and not is_ssh(target_url): # we are not coming in via SSH, hence cannot assume proper # setup for webserver access -> fix ssh('git -C {} update-server-info'.format(sh_quote(remoteds_path))) else: # TODO -- we might still want to reconfigure 'shared' setting! pass # at this point we have a remote sibling in some shape or form # -> add as remote lgr.debug("Adding the siblings") # TODO generator, yield the now swallowed results Siblings.__call__( 'configure', dataset=ds, name=name, url=ds_target_url, pushurl=ds_target_pushurl, recursive=False, fetch=True, as_common_datasrc=as_common_datasrc, publish_by_default=publish_by_default, publish_depends=publish_depends, annex_wanted=annex_wanted, annex_group=annex_group, annex_groupwanted=annex_groupwanted, inherit=inherit ) # check git version on remote end lgr.info("Adjusting remote git configuration") if ssh.get_git_version() and ssh.get_git_version() >= LooseVersion("2.4"): # allow for pushing to checked out branch try: ssh("git -C {} config receive.denyCurrentBranch updateInstead".format( sh_quote(remoteds_path))) except CommandError as e: lgr.error("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch. Error: %s", remoteds_path, exc_str(e)) else: lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping configuration" " of receive.denyCurrentBranch - you will not be able to" " publish updates to this repository. Upgrade your git" " and run with --existing=reconfigure", ssh.get_git_version()) if install_postupdate_hook: # enable metadata refresh on dataset updates to publication server lgr.info("Enabling git post-update hook ...") try: CreateSibling.create_postupdate_hook( remoteds_path, ssh, ds) except CommandError as e: lgr.error("Failed to add json creation command to post update " "hook.\nError: %s" % exc_str(e)) return remoteds_path
def __call__(reponame, dataset=None, recursive=False, recursion_limit=None, name='github', existing='error', github_login=None, github_organization=None, access_protocol='https', publish_depends=None, private=False, dryrun=False, dry_run=False): if dryrun and not dry_run: # the old one is used, and not in agreement with the new one warnings.warn( "datalad-create-sibling-github's `dryrun` option is " "deprecated and will be removed in a future release, " "use the renamed `dry_run/--dry-run` option instead.", DeprecationWarning) dry_run = dryrun # this is an absolute leaf package, import locally to avoid # unnecessary dependencies from datalad.support.github_ import _make_github_repos_ if reponame != normalize_reponame(reponame): raise ValueError( 'Invalid name for a GitHub project: {}'.format(reponame)) # what to operate on ds = require_dataset(dataset, check_installed=True, purpose='create GitHub sibling(s)') res_kwargs = dict( action='create_sibling_github [dry-run]' if dry_run else 'create_sibling_github', logger=lgr, refds=ds.path, ) # gather datasets and essential info # dataset instance and mountpoint relative to the top toprocess = [ds] if recursive: for sub in ds.subdatasets( fulfilled= None, # we want to report on missing dataset in here recursive=recursive, recursion_limit=recursion_limit, result_xfm='datasets'): if not sub.is_installed(): lgr.info('Ignoring unavailable subdataset %s', sub) continue toprocess.append(sub) # check for existing remote configuration filtered = [] for d in toprocess: if name in d.repo.get_remotes(): yield get_status_dict( ds=d, status='error' if existing == 'error' else 'notneeded', message=('already has a configured sibling "%s"', name), **res_kwargs) continue gh_reponame = reponame if d == ds else \ '{}-{}'.format( reponame, normalize_reponame(str(d.pathobj.relative_to(ds.pathobj)))) filtered.append((d, gh_reponame)) if not filtered: # all skipped return # actually make it happen on GitHub for res in _make_github_repos_(github_login, github_organization, filtered, existing, access_protocol, private, dry_run): # blend reported results with standard properties res = dict(res, **res_kwargs) if 'message' not in res: res['message'] = ("Dataset sibling '%s', project at %s", name, res['url']) # report to caller yield get_status_dict(**res) if res['status'] not in ('ok', 'notneeded'): # something went wrong, do not proceed continue # lastly configure the local datasets if not dry_run: extra_remote_vars = { # first make sure that annex doesn't touch this one # but respect any existing config 'annex-ignore': 'true', # first push should separately push active branch first # to overcome github issue of choosing "default" branch # alphabetically if its name does not match the default # branch for the user (or organization) which now defaults # to "main" 'datalad-push-default-first': 'true' } for var_name, var_value in extra_remote_vars.items(): var = 'remote.{}.{}'.format(name, var_name) if var not in d.config: d.config.add(var, var_value, where='local') yield from Siblings()( 'configure', dataset=d, name=name, url=res['url'], recursive=False, # TODO fetch=True, maybe only if one existed already publish_depends=publish_depends, result_renderer='disabled')