Beispiel #1
0
def test_create_osf_export(path):

    ds = Dataset(path).create(force=True)
    ds.save()

    create_results = ds.create_sibling_osf(
        title="CI dl-create",
        # do not create a git-remote
        mode="exportonly")

    assert_result_count(create_results,
                        1,
                        status='ok',
                        type='dataset',
                        name='osf-storage',
                        path=ds.path)

    # if we got here, we created something at OSF;
    # make sure, we clean up afterwards
    try:

        # for now just run an export and make sure it doesn't fail
        ds.repo.call_git(['annex', 'export', 'HEAD', '--to', 'osf-storage'])

    finally:
        # clean remote end:
        cred = get_credentials(allow_interactive=False)
        osf = OSF(**cred)
        delete_node(osf.session, create_results[0]['id'])
    def __call__(title, name="osf", dataset=None, mode="annex"):
        ds = require_dataset(dataset,
                             purpose="create OSF remote",
                             check_installed=True)
        # we need an annex
        if not isinstance(ds.repo, AnnexRepo):
            yield get_status_dict(action="create-sibling-osf",
                                  type="dataset",
                                  status="impossible",
                                  message="dataset has no annex")
            return

        # NOTES:
        # - we prob. should check osf-special-remote availability upfront to
        #   fail early
        # - publish-depends option?
        # - (try to) detect github/gitlab/bitbucket to suggest linking it on
        #   OSF and configure publish dependency
        #   -> prob. overkill; just make it clear in the doc
        # - add --recursive option
        #       - recursive won't work easily. Need to think that through.
        #       - would need a naming scheme for subdatasets
        #       - flat on OSF or a tree?
        #       - how do we detect something is there already, so we can skip
        #         rather than duplicate (with a new name)?
        #         osf-type-special-remote sufficient to decide it's not needed?
        # - adapt to conclusions in issue #30
        #   -> create those subcomponents
        # - results need to report URL for created projects suitable for datalad
        #   output formatting!
        #   -> result_renderer
        #   -> needs to ne returned by create_project

        # - option: Make public!

        cred = get_credentials(allow_interactive=True)
        osf = OSF(**cred)
        proj_id, proj_url = create_project(osf_session=osf.session,
                                           title=title)
        yield get_status_dict(action="create-project-osf",
                              type="dataset",
                              url=proj_url,
                              id=proj_id,
                              status="ok")

        init_opts = [
            "encryption=none", "type=external", "externaltype=osf",
            "autoenable=true", "project={}".format(proj_id)
        ]

        if mode == "export":
            init_opts += ["exporttree=yes"]

        ds.repo.init_remote(name, options=init_opts)
        # TODO: add special remote name to result?
        #       need to check w/ datalad-siblings conventions
        yield get_status_dict(action="add-sibling-osf",
                              type="dataset",
                              status="ok")
Beispiel #3
0
    def prepare(self):
        """"""
        node_id = self.annex.getconfig('node')
        if not node_id:
            # fall back on outdated 'project' parameter, which could be
            # just the node ID or a full URL to a project
            node_id = posixpath.basename(
                urlparse(self.annex.getconfig('project')).path.strip(
                    posixpath.sep))

        if not node_id:
            raise RemoteError('Could not determine OSF node ID')

        try:
            # make use of DataLad's credential manager for a more convenient
            # out-of-the-box behavior
            from datalad_osf.utils import get_credentials
            # we must stay non-interactive, because this is running inside
            # git-annex's special remote protocal
            creds = get_credentials(allow_interactive=False)
        except ImportError as e:
            # whenever anything goes wrong here, stay clam and fall back
            # on envvars.
            # we want this special remote to be fully functional without
            # datalad
            creds = dict(
                username=os.environ.get('OSF_USERNAME', None),
                password=os.environ.get('OSF_PASSWORD', None),
                token=os.environ.get('OSF_TOKEN', None),
            )
        # next one just sets up the stage, no requests performed yet, hence
        # no error checking needed
        # supply both auth credentials, so osfclient can fall back on user/pass
        # if needed
        osf = OSF(**creds)
        # next one performs initial auth
        try:
            self.node = osf.project(node_id)
        except Exception as e:
            # we need to raise RemoteError() such that PREPARE-FAILURE
            # is reported, sadly that doesn't give users any clue
            # TODO support datalad logging here
            raise RemoteError('Failed to obtain OSF node handle: {}'.format(e))
        # which storage to use, defaults to 'osfstorage'
        # TODO a node could have more than one? Make parameter to select?
        self.storage = self.node.storage()
Beispiel #4
0
def test_create_osf_simple(path):

    ds = Dataset(path).create(force=True)
    ds.save()

    file1 = Path('ds') / "file1.txt"

    create_results = ds.create_sibling_osf(name="osf")

    assert_result_count(create_results, 2, status='ok')
    assert_result_count(create_results,
                        1,
                        status='ok',
                        type='dataset',
                        name="osf-storage",
                        path=ds.path)
    assert_result_count(create_results,
                        1,
                        status='ok',
                        type='sibling',
                        name="osf",
                        path=ds.path)

    # if we got here, we created something at OSF;
    # make sure, we clean up afterwards
    try:
        # special remote is configured:
        remote_log = ds.repo.call_git(
            ['cat-file', 'blob', 'git-annex:remote.log'])
        assert_in("node={}".format(create_results[0]['id']), remote_log)

        # copy files over
        ds.repo.copy_to('.', "osf-storage")
        whereis = ds.repo.whereis(str(file1))
        here = ds.config.get("annex.uuid")
        # files should be 'here' and on remote end:
        assert_equal(len(whereis), 2)
        assert_in(here, whereis)

        # drop content here
        ds.drop('.')
        whereis = ds.repo.whereis(str(file1))
        # now on remote end only
        assert_equal(len(whereis), 1)
        assert_not_in(here, whereis)

        # and get content again from remote:
        ds.get('.')
        whereis = ds.repo.whereis(str(file1))
        assert_equal(len(whereis), 2)
        assert_in(here, whereis)
    finally:
        # clean remote end:
        cred = get_credentials(allow_interactive=False)
        osf = OSF(**cred)
        delete_node(osf.session, create_results[0]['id'])
Beispiel #5
0
    def prepare(self):
        """"""
        project_id = posixpath.basename(
            urlparse(self.annex.getconfig('project')).path.strip(
                posixpath.sep))

        # supply both auth credentials, so osfclient can fall back on user/pass
        # if needed
        osf = OSF(
            username=os.environ.get('OSF_USERNAME', None),
            password=os.environ.get('OSF_PASSWORD', None),
            token=os.environ.get('OSF_TOKEN', None),
        )  # TODO: error checking etc
        # next one performs initial auth
        self.project = osf.project(project_id)  # errors ??

        # which storage to use, defaults to 'osfstorage'
        # TODO a project could have more than one? Make parameter to select?
        self.storage = self.project.storage()
Beispiel #6
0
def with_project(f, osf_session=None, title=None, category="project"):
    creds = setup_credentials()
    # supply all credentials, so osfclient can fall back on user/pass
    # if needed
    osf = OSF(**creds)

    @wraps(f)
    def new_func(*args, **kwargs):
        proj_id, proj_url = create_project(osf.session,
                                           title,
                                           category=category)
        try:
            return f(*(args + (proj_id, )), **kwargs)
        finally:
            delete_project(osf.session, proj_id)

    return new_func
Beispiel #7
0
def with_node(f, osf_session=None, title=None, category="data"):
    # we don't want the test hanging, no interaction
    creds = get_credentials(allow_interactive=False)
    # supply all credentials, so osfclient can fall back on user/pass
    # if needed
    osf = OSF(**creds)

    @wraps(f)
    def new_func(*args, **kwargs):
        node_id, proj_url = create_node(
            osf.session,
            'Temporary DataLad CI project: {}'.format(title),
            category=category)
        try:
            return f(*(args + (node_id, )), **kwargs)
        finally:
            delete_node(osf.session, node_id)

    return new_func
Beispiel #8
0
 def _get_osf_api(self):
     """"""
     try:
         # make use of DataLad's credential manager for a more convenient
         # out-of-the-box behavior
         from datalad_osf.utils import get_credentials
         # we should be able to allow interactive
         creds = get_credentials(allow_interactive=True)
     except ImportError as e:
         # whenever anything goes wrong here, stay clam and fall back
         # on envvars.
         # we want this special remote to be fully functional without
         # datalad
         creds = dict(
             username=os.environ.get('OSF_USERNAME', None),
             password=os.environ.get('OSF_PASSWORD', None),
             token=os.environ.get('OSF_TOKEN', None),
         )
     # next one just sets up the stage, no requests performed yet, hence
     # no error checking needed
     # supply both auth credentials, so osfclient can fall back on user/pass
     # if needed
     return OSF(**creds)
Beispiel #9
0
    def __call__(method="token", reset=False):
        auth = None
        cred_spec = []
        if method == 'token':
            cred_spec = dict(token='token')
            auth = Token(
                name='https://osf.io',
                url='https://osf.io/settings/tokens',
            )
        elif method == 'userpassword':
            cred_spec = dict(user='******', password='******')
            auth = UserPassword(
                name='https://osf.io',
                url='https://osf.io/settings/account',
            )
        else:
            raise ValueError(
                'Unknown authentication method: {}'.format(method))
        if reset and auth.is_known:
            auth.delete()
        cred = {v: auth().get(k, None) for k, v in cred_spec.items()}

        # now verify that the credentials work by querying the
        # logged in user
        osf = OSF(**cred)
        try:
            req = osf.session.get('https://api.osf.io/v2/users/me/')
            req.raise_for_status()
        except UnauthorizedException:
            auth.delete()
            yield dict(
                action='osf_credentials',
                status='error',
                message='Invalid credentials',
                path=None,
            )
            return
        except Exception as e:
            yield dict(
                action='osf_credentials',
                status='impossible',
                message='Could not verify credentials, '
                'please try again: {}'.format(exc_str(e)),
                # needed to pacify DataLad 0.13.0 and earlier
                path=None,
            )
            return
        # if we get here auth has worked fine
        # get some attributes for an informative message
        attrs = req.json().get('data', {}).get('attributes', {})
        yield dict(
            action='osf_credentials',
            status='ok',
            message='authenticated{}{}{}'.format(
                ' as ' if any(
                    attrs.get(k, None)
                    for k in ('email', 'full_name')) else '',
                attrs.get('full_name', ''), ' <{}>'.format(attrs['email'])
                if attrs.get('email', None) else ''),
            # needed to pacify DataLad 0.13.0 and earlier
            path=None,
            # report effective credentials
            **cred,
        )
    def __call__(title=None,
                 name="osf",
                 storage_name=None,
                 dataset=None,
                 mode="annex",
                 existing='error',
                 trust_level=None,
                 tags=None,
                 public=False,
                 category='data',
                 description=None,
                 ):
        ds = require_dataset(dataset,
                             purpose="create OSF remote",
                             check_installed=True)
        res_kwargs = dict(
            ds=ds,
            action="create-sibling-osf",
            logger=lgr,
        )
        # we need an annex
        if not isinstance(ds.repo, AnnexRepo):
            yield get_status_dict(
                type="dataset",
                status="impossible",
                message="dataset has no annex",
                **res_kwargs)
            return

        # NOTES:
        # - we prob. should check osf-special-remote availability upfront to
        #   fail early
        # - add --recursive option
        #       - recursive won't work easily. Need to think that through.
        #       - would need a naming scheme for subdatasets
        #       - flat on OSF or a tree?
        #       - how do we detect something is there already, so we can skip
        #         rather than duplicate (with a new name)?
        #         osf-type-special-remote sufficient to decide it's not needed?
        # - adapt to conclusions in issue #30
        #   -> create those subcomponents
        # - results need to report URL for created projects suitable for datalad
        #   output formatting!
        #   -> result_renderer
        #   -> needs to ne returned by create_node

        if not storage_name:
            storage_name = "{}-storage".format(name)

        sibling_conflicts = sibling_exists(
            ds, [name, storage_name],
            # TODO pass through
            recursive=False, recursion_limit=None,
            # fail fast, if error is desired
            exhaustive=existing == 'error',
        )
        if existing == 'error' and sibling_conflicts:
            # we only asked for one
            conflict = sibling_conflicts[0]
            yield get_status_dict(
                status='error',
                message=(
                    "a sibling '%s' is already configured in dataset %s",
                    conflict[1], conflict[0]),
                **res_kwargs,
            )
            return

        if title is None:
            # use dataset root basename
            title = ds.pathobj.name

        tags = ensure_list(tags)
        if 'DataLad dataset' not in tags:
            tags.append('DataLad dataset')
        if ds.id and ds.id not in tags:
            tags.append(ds.id)

        if not description:
            description = \
                "This component was built from a DataLad dataset using the " \
                "datalad-osf extension " \
                "(https://github.com/datalad/datalad-osf)."
            if mode != 'exportonly':
                description += \
                    " With this extension installed, this component can be " \
                    "git or datalad cloned from a 'osf://ID' URL, where " \
                    "'ID' is the OSF node ID that shown in the OSF HTTP " \
                    "URL, e.g. https://osf.io/q8xnk/ can be cloned from " \
                    "osf://q8xnk"
        cred = get_credentials(allow_interactive=True)
        osf = OSF(**cred)
        node_id, node_url = create_node(
            osf_session=osf.session,
            title=title,
            category=category,
            tags=tags if tags else None,
            public=EnsureBool()(public),
            description=description,
        )
        if mode != 'gitonly':
            init_opts = ["encryption=none",
                         "type=external",
                         "externaltype=osf",
                         "autoenable=true",
                         "node={}".format(node_id)]

            if mode in ("export", "exportonly"):
                init_opts += ["exporttree=yes"]

            ds.repo.init_remote(storage_name, options=init_opts)
            if trust_level:
                ds.repo.call_git(['annex', trust_level, storage_name])

            yield get_status_dict(
                type="dataset",
                url=node_url,
                id=node_id,
                name=storage_name,
                status="ok",
                **res_kwargs
            )

        if mode == 'exportonly':
            return

        ds.config.set(
            'remote.{}.annex-ignore'.format(name), 'true',
            where='local')
        yield from ds.siblings(
            # use configure, not add, to not trip over the config that
            # we just made
            action='configure',
            name=name,
            url='osf://{}'.format(node_id),
            fetch=False,
            publish_depends=storage_name if mode != 'gitonly' else None,
            recursive=False,
            result_renderer=None,
        )