def test_create_osf_export(path): ds = Dataset(path).create(force=True) ds.save() create_results = ds.create_sibling_osf( title="CI dl-create", # do not create a git-remote mode="exportonly") assert_result_count(create_results, 1, status='ok', type='dataset', name='osf-storage', path=ds.path) # if we got here, we created something at OSF; # make sure, we clean up afterwards try: # for now just run an export and make sure it doesn't fail ds.repo.call_git(['annex', 'export', 'HEAD', '--to', 'osf-storage']) finally: # clean remote end: cred = get_credentials(allow_interactive=False) osf = OSF(**cred) delete_node(osf.session, create_results[0]['id'])
def __call__(title, name="osf", dataset=None, mode="annex"): ds = require_dataset(dataset, purpose="create OSF remote", check_installed=True) # we need an annex if not isinstance(ds.repo, AnnexRepo): yield get_status_dict(action="create-sibling-osf", type="dataset", status="impossible", message="dataset has no annex") return # NOTES: # - we prob. should check osf-special-remote availability upfront to # fail early # - publish-depends option? # - (try to) detect github/gitlab/bitbucket to suggest linking it on # OSF and configure publish dependency # -> prob. overkill; just make it clear in the doc # - add --recursive option # - recursive won't work easily. Need to think that through. # - would need a naming scheme for subdatasets # - flat on OSF or a tree? # - how do we detect something is there already, so we can skip # rather than duplicate (with a new name)? # osf-type-special-remote sufficient to decide it's not needed? # - adapt to conclusions in issue #30 # -> create those subcomponents # - results need to report URL for created projects suitable for datalad # output formatting! # -> result_renderer # -> needs to ne returned by create_project # - option: Make public! cred = get_credentials(allow_interactive=True) osf = OSF(**cred) proj_id, proj_url = create_project(osf_session=osf.session, title=title) yield get_status_dict(action="create-project-osf", type="dataset", url=proj_url, id=proj_id, status="ok") init_opts = [ "encryption=none", "type=external", "externaltype=osf", "autoenable=true", "project={}".format(proj_id) ] if mode == "export": init_opts += ["exporttree=yes"] ds.repo.init_remote(name, options=init_opts) # TODO: add special remote name to result? # need to check w/ datalad-siblings conventions yield get_status_dict(action="add-sibling-osf", type="dataset", status="ok")
def prepare(self): """""" node_id = self.annex.getconfig('node') if not node_id: # fall back on outdated 'project' parameter, which could be # just the node ID or a full URL to a project node_id = posixpath.basename( urlparse(self.annex.getconfig('project')).path.strip( posixpath.sep)) if not node_id: raise RemoteError('Could not determine OSF node ID') try: # make use of DataLad's credential manager for a more convenient # out-of-the-box behavior from datalad_osf.utils import get_credentials # we must stay non-interactive, because this is running inside # git-annex's special remote protocal creds = get_credentials(allow_interactive=False) except ImportError as e: # whenever anything goes wrong here, stay clam and fall back # on envvars. # we want this special remote to be fully functional without # datalad creds = dict( username=os.environ.get('OSF_USERNAME', None), password=os.environ.get('OSF_PASSWORD', None), token=os.environ.get('OSF_TOKEN', None), ) # next one just sets up the stage, no requests performed yet, hence # no error checking needed # supply both auth credentials, so osfclient can fall back on user/pass # if needed osf = OSF(**creds) # next one performs initial auth try: self.node = osf.project(node_id) except Exception as e: # we need to raise RemoteError() such that PREPARE-FAILURE # is reported, sadly that doesn't give users any clue # TODO support datalad logging here raise RemoteError('Failed to obtain OSF node handle: {}'.format(e)) # which storage to use, defaults to 'osfstorage' # TODO a node could have more than one? Make parameter to select? self.storage = self.node.storage()
def test_create_osf_simple(path): ds = Dataset(path).create(force=True) ds.save() file1 = Path('ds') / "file1.txt" create_results = ds.create_sibling_osf(name="osf") assert_result_count(create_results, 2, status='ok') assert_result_count(create_results, 1, status='ok', type='dataset', name="osf-storage", path=ds.path) assert_result_count(create_results, 1, status='ok', type='sibling', name="osf", path=ds.path) # if we got here, we created something at OSF; # make sure, we clean up afterwards try: # special remote is configured: remote_log = ds.repo.call_git( ['cat-file', 'blob', 'git-annex:remote.log']) assert_in("node={}".format(create_results[0]['id']), remote_log) # copy files over ds.repo.copy_to('.', "osf-storage") whereis = ds.repo.whereis(str(file1)) here = ds.config.get("annex.uuid") # files should be 'here' and on remote end: assert_equal(len(whereis), 2) assert_in(here, whereis) # drop content here ds.drop('.') whereis = ds.repo.whereis(str(file1)) # now on remote end only assert_equal(len(whereis), 1) assert_not_in(here, whereis) # and get content again from remote: ds.get('.') whereis = ds.repo.whereis(str(file1)) assert_equal(len(whereis), 2) assert_in(here, whereis) finally: # clean remote end: cred = get_credentials(allow_interactive=False) osf = OSF(**cred) delete_node(osf.session, create_results[0]['id'])
def prepare(self): """""" project_id = posixpath.basename( urlparse(self.annex.getconfig('project')).path.strip( posixpath.sep)) # supply both auth credentials, so osfclient can fall back on user/pass # if needed osf = OSF( username=os.environ.get('OSF_USERNAME', None), password=os.environ.get('OSF_PASSWORD', None), token=os.environ.get('OSF_TOKEN', None), ) # TODO: error checking etc # next one performs initial auth self.project = osf.project(project_id) # errors ?? # which storage to use, defaults to 'osfstorage' # TODO a project could have more than one? Make parameter to select? self.storage = self.project.storage()
def with_project(f, osf_session=None, title=None, category="project"): creds = setup_credentials() # supply all credentials, so osfclient can fall back on user/pass # if needed osf = OSF(**creds) @wraps(f) def new_func(*args, **kwargs): proj_id, proj_url = create_project(osf.session, title, category=category) try: return f(*(args + (proj_id, )), **kwargs) finally: delete_project(osf.session, proj_id) return new_func
def with_node(f, osf_session=None, title=None, category="data"): # we don't want the test hanging, no interaction creds = get_credentials(allow_interactive=False) # supply all credentials, so osfclient can fall back on user/pass # if needed osf = OSF(**creds) @wraps(f) def new_func(*args, **kwargs): node_id, proj_url = create_node( osf.session, 'Temporary DataLad CI project: {}'.format(title), category=category) try: return f(*(args + (node_id, )), **kwargs) finally: delete_node(osf.session, node_id) return new_func
def _get_osf_api(self): """""" try: # make use of DataLad's credential manager for a more convenient # out-of-the-box behavior from datalad_osf.utils import get_credentials # we should be able to allow interactive creds = get_credentials(allow_interactive=True) except ImportError as e: # whenever anything goes wrong here, stay clam and fall back # on envvars. # we want this special remote to be fully functional without # datalad creds = dict( username=os.environ.get('OSF_USERNAME', None), password=os.environ.get('OSF_PASSWORD', None), token=os.environ.get('OSF_TOKEN', None), ) # next one just sets up the stage, no requests performed yet, hence # no error checking needed # supply both auth credentials, so osfclient can fall back on user/pass # if needed return OSF(**creds)
def __call__(method="token", reset=False): auth = None cred_spec = [] if method == 'token': cred_spec = dict(token='token') auth = Token( name='https://osf.io', url='https://osf.io/settings/tokens', ) elif method == 'userpassword': cred_spec = dict(user='******', password='******') auth = UserPassword( name='https://osf.io', url='https://osf.io/settings/account', ) else: raise ValueError( 'Unknown authentication method: {}'.format(method)) if reset and auth.is_known: auth.delete() cred = {v: auth().get(k, None) for k, v in cred_spec.items()} # now verify that the credentials work by querying the # logged in user osf = OSF(**cred) try: req = osf.session.get('https://api.osf.io/v2/users/me/') req.raise_for_status() except UnauthorizedException: auth.delete() yield dict( action='osf_credentials', status='error', message='Invalid credentials', path=None, ) return except Exception as e: yield dict( action='osf_credentials', status='impossible', message='Could not verify credentials, ' 'please try again: {}'.format(exc_str(e)), # needed to pacify DataLad 0.13.0 and earlier path=None, ) return # if we get here auth has worked fine # get some attributes for an informative message attrs = req.json().get('data', {}).get('attributes', {}) yield dict( action='osf_credentials', status='ok', message='authenticated{}{}{}'.format( ' as ' if any( attrs.get(k, None) for k in ('email', 'full_name')) else '', attrs.get('full_name', ''), ' <{}>'.format(attrs['email']) if attrs.get('email', None) else ''), # needed to pacify DataLad 0.13.0 and earlier path=None, # report effective credentials **cred, )
def __call__(title=None, name="osf", storage_name=None, dataset=None, mode="annex", existing='error', trust_level=None, tags=None, public=False, category='data', description=None, ): ds = require_dataset(dataset, purpose="create OSF remote", check_installed=True) res_kwargs = dict( ds=ds, action="create-sibling-osf", logger=lgr, ) # we need an annex if not isinstance(ds.repo, AnnexRepo): yield get_status_dict( type="dataset", status="impossible", message="dataset has no annex", **res_kwargs) return # NOTES: # - we prob. should check osf-special-remote availability upfront to # fail early # - add --recursive option # - recursive won't work easily. Need to think that through. # - would need a naming scheme for subdatasets # - flat on OSF or a tree? # - how do we detect something is there already, so we can skip # rather than duplicate (with a new name)? # osf-type-special-remote sufficient to decide it's not needed? # - adapt to conclusions in issue #30 # -> create those subcomponents # - results need to report URL for created projects suitable for datalad # output formatting! # -> result_renderer # -> needs to ne returned by create_node if not storage_name: storage_name = "{}-storage".format(name) sibling_conflicts = sibling_exists( ds, [name, storage_name], # TODO pass through recursive=False, recursion_limit=None, # fail fast, if error is desired exhaustive=existing == 'error', ) if existing == 'error' and sibling_conflicts: # we only asked for one conflict = sibling_conflicts[0] yield get_status_dict( status='error', message=( "a sibling '%s' is already configured in dataset %s", conflict[1], conflict[0]), **res_kwargs, ) return if title is None: # use dataset root basename title = ds.pathobj.name tags = ensure_list(tags) if 'DataLad dataset' not in tags: tags.append('DataLad dataset') if ds.id and ds.id not in tags: tags.append(ds.id) if not description: description = \ "This component was built from a DataLad dataset using the " \ "datalad-osf extension " \ "(https://github.com/datalad/datalad-osf)." if mode != 'exportonly': description += \ " With this extension installed, this component can be " \ "git or datalad cloned from a 'osf://ID' URL, where " \ "'ID' is the OSF node ID that shown in the OSF HTTP " \ "URL, e.g. https://osf.io/q8xnk/ can be cloned from " \ "osf://q8xnk" cred = get_credentials(allow_interactive=True) osf = OSF(**cred) node_id, node_url = create_node( osf_session=osf.session, title=title, category=category, tags=tags if tags else None, public=EnsureBool()(public), description=description, ) if mode != 'gitonly': init_opts = ["encryption=none", "type=external", "externaltype=osf", "autoenable=true", "node={}".format(node_id)] if mode in ("export", "exportonly"): init_opts += ["exporttree=yes"] ds.repo.init_remote(storage_name, options=init_opts) if trust_level: ds.repo.call_git(['annex', trust_level, storage_name]) yield get_status_dict( type="dataset", url=node_url, id=node_id, name=storage_name, status="ok", **res_kwargs ) if mode == 'exportonly': return ds.config.set( 'remote.{}.annex-ignore'.format(name), 'true', where='local') yield from ds.siblings( # use configure, not add, to not trip over the config that # we just made action='configure', name=name, url='osf://{}'.format(node_id), fetch=False, publish_depends=storage_name if mode != 'gitonly' else None, recursive=False, result_renderer=None, )