Example #1
0
    def __call__(self, remote=False):
        """
        Parameters
        ----------
        remote: bool
            If True, list handles from registered remote collections only.
            Otherwise list locally installed handles instead.

        Returns
        -------
        list of Handle
        """

        local_master = get_datalad_master()
        handle_list = list()
        if remote:
            for remote_branch in local_master.git_get_remote_branches():
                if not remote_branch.endswith('/master'): # for now only those
                    continue
                for h in CollectionRepoBackend(
                        local_master, branch=remote_branch).get_handles():
                    handle_list.append(Handle(
                        CollectionRepoHandleBackend(local_master, key=h,
                                                    branch=remote_branch)))
                    remote_name = '/'.join(remote_branch.split('/')[:-1])
                    print("%s/%s" % (remote_name, h))
        else:
            for handle in local_master.get_handle_list():
                handle_list.append(Handle(
                    CollectionRepoHandleBackend(local_master, handle)))
                print(handle)

        return handle_list
Example #2
0
    def __call__(self, key=curdir):

        # TODO: use name of local master, instead of --all option!

        local_master = get_datalad_master()

        if key == curdir:
            try:
                repo = get_repo_instance()
            except RuntimeError as e:
                # Not inside repo => update master
                repo = local_master
        elif exists(key):
            try:
                repo = get_repo_instance(path=key)
            except RuntimeError as e:
                # No valid repository at given path
                lgr.error(str(e))
                return -1
        else:
            # if it's not an existing path, try treating it as a name:

            if key in local_master.git_get_remotes():
                # it's a registered collection's name:
                path = CollectionRepoBackend(local_master, key).url
                if exists(path):
                    try:
                        repo = CollectionRepo(path)
                    except RuntimeError as e:
                        # Collection found by it's name, but invalid repository
                        lgr.error("Collection '%s': %s" % (key, str(e)))
                        return -1
                else:
                    lgr.error("Collection '%s': path %s doesn't exist." %
                              (key, path))
                    return -1
            elif key in local_master.get_handle_list():
                # it's a handle's name:
                path = CollectionRepoHandleBackend(local_master, key).url
                if exists(path):
                    try:
                        repo = HandleRepo(path)
                    except RuntimeError as e:
                        # Collection found by it's name, but invalid repository
                        lgr.error("Handle '%s': %s" % (key, str(e)))
                        return -1
                else:
                    lgr.error("Handle '%s': path %s doesn't exist." %
                              (key, path))
                    return -1

            else:
                lgr.error("'%s' is neither a known collection nor a known "
                          "handle." % key)
                return -1

        # We have a repo instance from above: update!
        for remote in repo.git_get_remotes():
            repo.git_fetch(remote)
Example #3
0
    def __call__(self, handle, collection, name=None):
        """
        Returns
        -------
        Handle
        """

        local_master = get_datalad_master()

        if isdir(abspath(expandvars(expanduser(handle)))):
            h_path = abspath(expandvars(expanduser(handle)))
            handle_repo = HandleRepo(h_path, create=False)
        elif handle in local_master.get_handle_list():
            h_path = urlparse(CollectionRepoHandleBackend(repo=local_master,
                                                 key=handle).url).path
            handle_repo = HandleRepo(h_path, create=False)
            if not isdir(h_path):
                raise RuntimeError("Invalid path to handle '%s':\n%s" %
                                   (handle, h_path))

        elif urlparse(handle).scheme != '':  # rudimentary plausibility check for now
            # treat as a remote annex
            handle_repo = handle
        else:
            raise RuntimeError("Unknown handle '%s'." % handle)

        if isdir(abspath(expandvars(expanduser(collection)))):
            c_path = abspath(expandvars(expanduser(collection)))
        elif collection in local_master.git_get_remotes():
            c_path = urlparse(local_master.git_get_remote_url(collection)).path
            if not isdir(c_path):
                raise RuntimeError("Invalid path to collection '%s':\n%s" %
                                   (collection, c_path))
        else:
            raise RuntimeError("Unknown collection '%s'." % collection)

        collection_repo = CollectionRepo(c_path, create=False)
        collection_repo.add_handle(handle_repo, name=name)

        # get handle's metadata, if there's any:
        if isinstance(handle_repo, HandleRepo) and \
                exists(opj(handle_repo.path, HANDLE_META_DIR,
                           REPO_STD_META_FILE)):
            collection_repo.import_metadata_to_handle(CustomImporter,
                                                      key=name if name is not None else handle_repo.name,
                                                      files=opj(
                                                          handle_repo.path,
                                                          HANDLE_META_DIR))

        # TODO: More sophisticated: Check whether the collection is registered.
        # Might be a different name than collection_repo.name or not at all.
        local_master.git_fetch(collection_repo.name)

        return Handle(CollectionRepoHandleBackend(collection_repo,
                                                  name if name is not None
                                                  else handle_repo.name))
Example #4
0
    def __call__(self, format, path, subject=None, handle=None):
        """
        Returns
        -------
        Handle or Collection
        """

        if len(path) == 1:
            if exists(path[0]) and isdir(path[0]):
                path = path[0]
            else:
                raise RuntimeError("Not an existing directory: %s" % path[0])

        repo = get_repo_instance()

        # TODO: Should we accept a pure annex and create a handle repo from it?
        if isinstance(repo, HandleRepo):
            repo.import_metadata(ImporterDict[format], files=path,
                                 about_uri=subject if subject is not None
                                 else DLNS.this)
        elif isinstance(repo, CollectionRepo):
            if handle is None:
                # collection level
                repo.import_metadata_collection(ImporterDict[format],
                                                files=path, about_uri=subject)
            else:
                repo.import_metadata_to_handle(ImporterDict[format], handle,
                                               files=path, about_uri=subject)

        # Update metadata of local master collection:
        local_master = get_datalad_master()

        if isinstance(repo, CollectionRepo):
            # update master if it is a registered collection:
            for c in local_master.git_get_remotes():
                if repo.path == local_master.git_get_remote_url(c):
                    local_master.git_fetch(c)
        elif isinstance(repo, HandleRepo):
            # update master if it is an installed handle:
            for h in local_master.get_handle_list():
                if repo.path == urlparse(
                        CollectionRepoHandleBackend(local_master, h).url).path:
                    local_master.import_metadata_to_handle(CustomImporter,
                                                           key=h,
                                                           files=opj(
                                                               repo.path,
                                                               HANDLE_META_DIR))

        # TODO: What to do in case of a handle, if it is part of another
        # locally available collection than just the master?

        if isinstance(repo, CollectionRepo):
            return Collection(CollectionRepoBackend(repo))
        elif isinstance(repo, HandleRepo):
            return Handle(HandleRepoBackend(repo))
Example #5
0
    def __call__(self, search):
        """
        Returns
        -------
        list of Collection
        """

        # TODO: currently returns collections uri instead of path, which may
        # lead to DLNS.this being printed put.

        # TODO: since search-handle and search-collection only slightly differ,
        # build a search call, that's more general and both can use
        # This one should allow for searching for other entities as well

        local_master = get_datalad_master()

        metacollection = MetaCollection(
            [local_master.get_backend_from_branch(remote + "/master")
             for remote in local_master.git_get_remotes()] +
            [local_master.get_backend_from_branch()])

        # TODO: Bindings should be done in collection class:
        metacollection.conjunctive_graph.bind('dlns', DLNS)

        query_string = """SELECT ?g ?r {GRAPH ?g {?r rdf:type dlns:Collection .
                                             ?s ?p ?o .
                                             FILTER regex(?o, "%s")}}""" % \
                       search

        results = metacollection.conjunctive_graph.query(query_string)

        rows = [row.asdict() for row in results]
        collections = list()
        locations = list()
        for row in rows:
            collections.append(str(row['g']))
            parsed_uri = urlparse(row['r'])
            if parsed_uri.scheme == 'file':
                locations.append(parsed_uri.path)
            else:
                locations.append(str(row['r']))

        if collections:
            width = max(len(c) for c in collections)
            for c, l in zip(collections, locations):
                print("%s\t%s" % (c.ljust(width), l))

            return [Collection(CollectionRepoBackend(local_master, col + "/master"))
                    for col in collections]
        else:
            return []
Example #6
0
    def __call__(self):
        """
        Returns
        -------
        list of Collection
        """

        local_master = get_datalad_master()
        for collection in local_master.git_get_remotes():
            print(collection)

        return [Collection(CollectionRepoBackend(local_master,
                                                 branch=remote + "/master"))
                for remote in local_master.git_get_remotes()]
Example #7
0
    def __call__(self, query, collections=None):
        """
        Returns
        -------
        rdflib.query.QueryResult
        """

        # TODO: sanity checks for the query;

        local_master = get_datalad_master()

        be_list = list()
        if collections == [] or collections is None:
            be_list.extend([local_master.get_backend_from_branch(remote +
                                                                 "/master")
                            for remote in local_master.git_get_remotes()])
            be_list.append(local_master.get_backend_from_branch())
        else:
            for c in collections:
                if c in local_master.git_get_remotes():
                    be_list.append(local_master.get_backend_from_branch(
                        c + "/master"))
                elif c == local_master.name:
                    be_list.append(local_master.get_backend_from_branch())
                else:
                    raise RuntimeError("Collection '%s' unknown. Canceled." % c)

        m_clt = MetaCollection(be_list)

        # TODO: move following prefix bindings
        for g in m_clt.store.contexts():
            if g == m_clt.conjunctive_graph:
                # is this even possible?
                continue
            for prefix, ns in g.namespaces():
                m_clt.conjunctive_graph.bind(prefix, ns)

        # the actual query:
        results = m_clt.conjunctive_graph.query(query)

        for row in results:
            out = ""
            for col in row:
                out += "\t%s" % col
            out.lstrip('\t')
            print(out)

        return results
Example #8
0
    def __call__(self, path=curdir, name=None):
        """
        Returns
        -------
        Handle
        """

        local_master = get_datalad_master()

        new_handle = HandleRepo(abspath(expandvars(expanduser(path))),
                                name=name, create=True)
        local_master.add_handle(new_handle, name=name)
        # TODO: get metadata, in case there is some already.
        # This implicates the option to use create-handle on an existing annex.

        return Handle(HandleRepoBackend(new_handle))
Example #9
0
    def __call__(self, handle):

        # TODO: unifying: also accept path to handle

        local_master = get_datalad_master()

        if handle not in local_master.get_handle_list():
            raise ValueError("Handle '%s' unknown." % handle)

        # converting file-scheme url to local path:
        path = urlparse(CollectionRepoHandleBackend(local_master,
                                                    handle).url).path
        try:
            rmtree(path)
        except OSError as e:
            lgr.warning("Couldn't delete %s:\n%s" % (path, str(e)))

        local_master.remove_handle(handle)
Example #10
0
    def __call__(self, handle=curdir, upgrade_data=False):
        """
        Returns
        -------
        Handle
        """

        local_master = get_datalad_master()

        if exists(handle):
            repo = get_repo_instance(handle, HandleRepo)
        elif handle in local_master.get_handle_list():
            repo = get_repo_instance(CollectionRepoHandleBackend(local_master,
                                                                 handle).url)
        else:
            lgr.error("Unknown handle '%s'." % handle)
            raise RuntimeError("Unknown handle '%s'." % handle)

        remotes = repo.git_get_remotes()
        if not remotes:
            raise RuntimeError("No remotes were found for %s. Cannot upgrade"
                               % repo.path)

        # TODO: it might be arbitrary other remote, not necessarily origin
        # That information is stored in git/.config -- use it
        upgrade_remote = 'origin'
        if upgrade_remote not in remotes:
            raise RuntimeError("No remote %r found to upgrade from. Known remotes: %s"
                               % (upgrade_remote, ', '.join(remotes)))

        if upgrade_data:
            # what files do we currently have?
            files_to_upgrade = [f for f in repo.get_indexed_files()
                                if repo.file_has_content(f)]

        # upgrade it:
        repo.git_pull(upgrade_remote)

        if upgrade_data:
            # upgrade content:
            repo.get(files_to_upgrade)

        return Handle(HandleRepoBackend(repo))
Example #11
0
    def __call__(self, key):
        """
        Returns
        -------
        str
        """

        local_master = get_datalad_master()

        if key in local_master.git_get_remotes():
            location = CollectionRepoBackend(local_master, key).url
        elif key in local_master.get_handle_list():
            location = CollectionRepoHandleBackend(local_master, key).url
        else:
            lgr.error("Unknown name '%s'" % key)

        result = urlparse(location).path
        print(result)
        return result
Example #12
0
    def __call__(self, path=curdir, name=None):
        # TODO: Collection => graph => lazy
        """
        Returns
        -------
        Collection
        """

        local_master = get_datalad_master()

        # create the collection:
        new_collection = CollectionRepo(abspath(expandvars(expanduser(path))),
                                        name=name, create=True)
        # TODO: Move the abspath conversion to a constraint!
        # Additionally (or instead?) check for validity: existing directory or
        # just non-existing.

        # register with local master:
        local_master.git_remote_add(new_collection.name, new_collection.path)
        local_master.git_fetch(new_collection.name)

        return Collection(CollectionRepoBackend(new_collection))
Example #13
0
    def __call__(self, target, collection=curdir, baseurl=None,
                 remote_name=None):
        """
        Returns
        -------
        Collection
        """

        # TODO: Note: Yarik's git mtheirs for publishing branches!

        local_master = get_datalad_master()

        if isdir(abspath(expandvars(expanduser(collection)))):
            c_path = abspath(expandvars(expanduser(collection)))
        elif collection in local_master.git_get_remotes():
            c_path = urlparse(local_master.git_get_remote_url(collection)).path
            if not isdir(c_path):
                raise RuntimeError("Invalid path to collection '%s':\n%s" %
                                   (collection, c_path))
        else:
            raise RuntimeError("Unknown collection '%s'." % collection)

        local_collection_repo = get_repo_instance(
            abspath(expandvars(expanduser(c_path))), CollectionRepo)

        available_handles = [key for key in
                             local_collection_repo.get_handle_list()
                             if exists(urlparse(CollectionRepoHandleBackend(
                                 local_collection_repo, key).url).path)]

        parsed_target = urlparse(target)  # => scheme, path

        from pkg_resources import resource_filename
        prepare_script_path = \
            resource_filename('datalad',
                              'resources/sshserver_prepare_for_publish.sh')
        cleanup_script_path = \
            resource_filename('datalad',
                              'resources/sshserver_cleanup_after_publish.sh')

        from ..cmd import Runner
        runner = Runner()
        if parsed_target.scheme == 'ssh':
            if parsed_target.netloc == '':
                raise RuntimeError("Invalid ssh address: %s" % target)

            if baseurl is None:
                baseurl = target
            collection_url = baseurl + '/' + local_collection_repo.name + \
                             ".datalad-collection"

            # build control master:
            from datalad.utils import assure_dir
            var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid()
            assure_dir(var_run_user_datalad)
            control_path = "%s/%s" % (var_run_user_datalad, parsed_target.netloc)
            control_path += ":%s" % parsed_target.port if parsed_target.port else ""

            # start controlmaster:

            cmd_str = "ssh -o \"ControlMaster=yes\" -o \"ControlPath=%s\" " \
                      "-o \"ControlPersist=yes\" %s exit" % \
                      (control_path, parsed_target.netloc)
            lgr.error("DEBUG: %s" % cmd_str)
            import subprocess
            proc = subprocess.Popen(cmd_str, shell=True)
            proc.communicate(input="\n")  # why the f.. this is necessary?

            # prepare target repositories:

            script_options = "%s %s.datalad-collection" % (parsed_target.path,
                                                    local_collection_repo.name)
            for key in available_handles:
                # prepare repos for locally available handles only
                script_options += " %s" % key

            cmd_str = "ssh -S %s %s \'cat | sh /dev/stdin\' %s" % \
                      (control_path, parsed_target.netloc, script_options)
            cmd_str += " < %s" % prepare_script_path
            try:
                out, err = runner.run(cmd_str)
            except CommandError as e:
                lgr.error("Preparation script failed: %s" % str(e))
                out = e.stdout
                err = e.stderr

            # set GIT-SSH:
            environ['GIT_SSH'] = resource_filename('datalad',
                                                   'resources/git_ssh.sh')

        elif parsed_target.scheme == 'file' or parsed_target.scheme == '':
            # we should have a local target path
            if not isdir(abspath(expandvars(expanduser(parsed_target.path)))):
                raise RuntimeError("%s doesn't exist." % parsed_target.path)

            target_path = abspath(expandvars(expanduser(parsed_target.path)))
            if baseurl is None:
                baseurl = target_path
            collection_url = baseurl + '/' + local_collection_repo.name + \
                             ".datalad-collection"

            try:
                out, err = runner.run(["sh", prepare_script_path,
                                       target_path,
                                       local_collection_repo.name +
                                       ".datalad-collection"]
                                      + available_handles)
            except CommandError as e:
                lgr.error("Preparation script failed: %s" % str(e))
                out = e.stdout
                err = e.stderr

        else:
            raise RuntimeError("Don't know scheme '%s'." %
                               parsed_target.scheme)

        # check output:
        results = parse_script_output(out, err)

        script_failed = False
        for name in available_handles + \
                [local_collection_repo.name + ".datalad-collection"]:
            if not results[name]['init']:
                lgr.error("Server setup for %s failed." % name)
                script_failed = True
        # exit here, if something went wrong:
        if script_failed:
            raise RuntimeError("Server setup failed.")

        # Now, all the handles:
        from .publish_handle import PublishHandle
        handle_publisher = PublishHandle()
        for handle_name in available_handles:

            # get location:
            handle_loc = urlparse(CollectionRepoHandleBackend(
                local_collection_repo, handle_name).url).path
            # raise exception if there's no handle at that location:
            try:
                handle_repo = get_repo_instance(handle_loc, HandleRepo)
            except RuntimeError as e:
                lgr.error("'%s': No handle available at %s. Skip." %
                          (handle_name, handle_loc))
                continue

            annex_ssh = "-S %s" % control_path \
                if parsed_target.scheme == 'ssh' else None
            handle_publisher(None, handle=handle_loc,
                             url=baseurl + '/' + handle_name,
                             ssh_options=annex_ssh)

        # TODO: check success => go on with collection

        # prepare publish branch in local collection:
        # check for existing publish branches:
        from random import choice
        from string import ascii_letters
        from six.moves import xrange
        p_branch = "publish_" + ''.join(choice(ascii_letters) for i in xrange(6))
        local_collection_repo.git_checkout(p_branch, '-b')

        importer = CustomImporter('Collection', 'Collection', DLNS.this)
        importer.import_data(local_collection_repo.path)
        graphs = importer.get_graphs()
        orig_uri = graphs[REPO_STD_META_FILE[0:-4]].value(predicate=RDF.type,
                                                          object=DLNS.Collection)

        # correct collection uri
        new_uri = URIRef(collection_url)
        for graph_name in graphs:
            for p, o in graphs[graph_name].predicate_objects(subject=orig_uri):
                graphs[graph_name].remove((orig_uri, p, o))
                graphs[graph_name].add((new_uri, p, o))

        # correct handle uris in hasPart statements:
        replacements = []
        from datalad.support.collection import Collection
        from datalad.support.collectionrepo import CollectionRepoBackend
        col_meta = Collection(CollectionRepoBackend(local_collection_repo))
        for o in graphs[REPO_STD_META_FILE[0:-4]].objects(subject=new_uri,
                                                          predicate=DCTERMS.hasPart):
            from os.path import basename
            path = urlparse(o).path
            if exists(path):
                # local handle
                # retrieve name for that uri:
                # Note: That's an experimental implementation
                hdl_name = None
                for key in col_meta:
                    if urlparse(col_meta[key].url).path == path:
                        hdl_name = col_meta[key].name
                if hdl_name is None:
                    raise RuntimeError("No handle found for path '%s'." % path)

                o_new = URIRef(baseurl + '/' + hdl_name)
                # replacements for collection level:
                replacements.append((o, o_new))

                # replace in collection's handle storage:
                hdl_dir = opj(local_collection_repo.path,
                              local_collection_repo._key2filename(hdl_name))
                hdl_importer = CustomImporter('Collection', 'Handle', o)
                hdl_importer.import_data(hdl_dir)
                hdl_graphs = hdl_importer.get_graphs()
                for g in hdl_graphs:
                    import rdflib
                    rdflib.Graph()
                    for pre, obj in hdl_graphs[g].predicate_objects(o):
                        hdl_graphs[g].remove((o, pre, obj))
                        hdl_graphs[g].add((o_new, pre, obj))
                hdl_importer.store_data(hdl_dir)
                local_collection_repo.git_add(hdl_dir)

            else:
                # We have a locally not available handle
                # in that collection, that therefore can't be published.
                # Just skip for now and assume uri simply doesn't change.
                continue
        for o, o_new in replacements:
            graphs[REPO_STD_META_FILE[0:-4]].remove((new_uri, DCTERMS.hasPart, o))
            graphs[REPO_STD_META_FILE[0:-4]].add((new_uri, DCTERMS.hasPart, o_new))

        # TODO: add commit reference?

        importer.store_data(local_collection_repo.path)
        [local_collection_repo.git_add(graph_name + '.ttl')
         for graph_name in graphs]
        local_collection_repo.git_commit("metadata prepared for publishing")

        # add as remote to local:
        # TODO: Better remote name?
        if remote_name is None:
            remote_name = p_branch
        local_collection_repo.git_remote_add(remote_name, collection_url)

        # push local branch "publish" to remote branch "master"
        # we want to push to master, so a different branch has to be checked
        # out in target; in general we can't explicitly allow for the local
        # repo to push
        local_collection_repo.git_push("%s +%s:master" % (remote_name, p_branch))

        # checkout master in local collection:
        local_collection_repo.git_checkout("master")

        # checkout master in published collection:
        if parsed_target.scheme == 'ssh':
            cmd_str = "ssh -S %s %s \'cat | sh /dev/stdin\' %s" % \
                      (control_path, parsed_target.netloc, script_options)
            cmd_str += " < %s" % cleanup_script_path
            try:
                out, err = runner.run(cmd_str)
            except CommandError as e:
                lgr.error("Clean-up script failed: %s" % str(e))

            # stop controlmaster:
            cmd_str = "ssh -O stop -S %s %s" % (control_path,
                                                parsed_target.netloc)
            try:
                out, err = runner.run(cmd_str)
            except CommandError as e:
                lgr.error("Stopping ssh control master failed: %s" % str(e))

        else:
            try:
                out, err = runner.run(["sh", cleanup_script_path,
                                       target_path,
                                       local_collection_repo.name +
                                       ".datalad-collection"]
                                      + available_handles)
            except CommandError as e:
                lgr.error("Clean-up script failed: %s" % str(e))

        # TODO: final check, whether everything is fine
        # Delete publish branch:
        local_collection_repo._git_custom_command('', 'git branch -D %s'
                                                  % p_branch)

        return Collection(CollectionRepoBackend(local_collection_repo,
                                                remote_name + "/master"))
Example #14
0
    def __call__(self, url, name=None):
        # TODO: After publishing new demo collection, adapt doctest
        """
        Examples
        --------
        >>> from datalad.api import register_collection, list_collections
        >>> def test_register_collection_simple():
        ...     assert(col.name not in [c.name for c in list_collections()])
        ...     col =register_collection("http://collections.datalad.org/demo/DATALAD_COL_demo_collection")
        ...     assert(col.name in [c.name for c in list_collections()])
        ...     assert(col.url == "http://collections.datalad.org/demo/DATALAD_COL_demo_collection/.git")
        ...     assert(col.name == "DATALAD_COL_demo_collection")

        Returns
        -------
        Collection
        """

        local_master = get_datalad_master()

        # check whether url is a local path:
        if isdir(abspath(expandvars(expanduser(url)))):
            url = abspath(expandvars(expanduser(url)))
            # raise exception, if it's not a valid collection:
            repo = CollectionRepo(url, create=False)
            if name is None:
                name = repo.name

        else:
            # Try to auto complete collection's url:
            url += '/' if not url.endswith('/') else ''
            url_completions = [url,
                               url + '.git',
                               url + url.rstrip('/').split('/')[-1] +
                               '.datalad-collection/.git']

            url_ok = False
            for address in url_completions:
                try:
                    # use ls-remote to verify git can talk to that repository:
                    local_master.git_ls_remote(address, "-h")
                    url = address
                    url_ok = True
                    break
                except CommandError as e:
                    if re.match("fatal.+?%s.+?not found" % url, e.stderr):
                        continue
                    else:
                        lgr.error("Registering collection failed.\n%s" % e)
                        return

            if not url_ok:
                lgr.error("Registering collection failed. "
                          "Couldn't find remote repository.")
                return

            if name is None:
                # derive name from url:
                parts = url.rstrip('/').split('/')
                if parts[-1] == '.git':
                    name = parts[-2]
                elif parts[-1].endswith('.git'):
                    name = parts[-1][0:-4]
                elif parts[-1].endswith('.datalad-collection'):
                    name = parts[-1][0:-19]
                else:
                    name = parts[-1]

        local_master.git_remote_add(name, url)
        local_master.git_fetch(name)

        return Collection(CollectionRepoBackend(local_master,
                                                name + "/master"))
Example #15
0
    def __call__(self, handle, path=None, name=None):
        """
        Examples
        --------
        >>> from datalad.api import install_handle, list_handles, whereis
        >>> def test_install_handle_simple():
        ...     assert("forrest_gump" not in [h.name for h in list_handles()])
        ...     handle = install_handle("http://psydata.ovgu.de/forrest_gump/.git")
        ...     assert(os.path.exists(os.path.join(getpwd(), 'forrest_gump', '.git', 'annex')))
        ...     assert(handle.name == "forrest_gump")
        ...     assert(handle.name in [h.name for h in list_handles()])
        ...     assert(os.path.join(getpwd(), 'forrest_gump') == whereis("forrest_gump"))

        Returns
        -------
        Handle
        """
        # TODO: doctest apparently detected by nose and passed, but doesn't
        # seem to actually be executed yet.

        local_master = get_datalad_master()

        # check whether 'handle' is a key ("{collection}/{handle}")
        # or a local path or an url:
        parts = handle.split("/")
        if parts[0] == local_master.name:
            # addressing a handle, that is part of local master collection
            # Note: Theoretically, we could allow for this if a new name is
            # given.
            raise ValueError("Installing handles from collection '%s' doesn't " "make sense." % local_master.name)

        name_prefix = None
        handle_name = None
        if parts[0] in local_master.git_get_remotes() and len(parts) >= 2:
            # 'handle' starts with a name of a known collection, followed by at
            # least a second part, separated by '/'.
            # Therefore assuming it's a handle's key, not an url

            handle_name = handle[len(parts[0]) + 1 :]
            url = CollectionRepoHandleBackend(repo=local_master, key=handle_name, branch=parts[0] + "/master").url
            name_prefix = parts[0] + "/"

            # TODO: Where to determine whether the handle even exists?
            # May be use Collection instead and check for "hasPart"->url
            # Note: Actually CollectionRepoHandleBackend should raise an
            # exception!
        elif isdir(abspath(expandvars(expanduser(handle)))):
            # appears to be a local path
            url = abspath(expandvars(expanduser(handle)))
        else:
            # assume it's an url:
            # TODO: Further checks needed? May be at least check for spaces and
            # ';' to avoid injection?
            url = handle

        if not path:
            if exists(url):
                # could well be a local path
                subdir = basename(url)
            else:
                # proper url -- could be a bit more evolved
                subdir = get_url_straight_filename(url, allowdir=True, strip=[".git"])
            install_path = opj(getpwd(), subdir)
        else:
            install_path = abspath(expandvars(expanduser(path)))

        # TODO:  name might be in conflict -- already have a handle with that name
        # More difficult especially if installed_handle.name to be taken!
        # It should fail as early as possible, i.e. without actually calling
        # HandleRepo(create=True) first, but we wouldn't know the name unless
        # we sense remotely!!! TODO
        known_handles = local_master.get_handle_list()
        if name and name in known_handles:
            epath = CollectionRepoHandleBackend(local_master, name).url
            if epath.startswith("file://"):
                epath = get_url_path(epath)
            if epath != install_path:
                raise ValueError(
                    "Handle %s is already known and already installed under "
                    "different path %s. Specify another name" % (name, epath)
                )

        if exists(install_path):
            # try to overlay without any creation/init
            try:
                installed_handle = HandleRepo(install_path, create=False)
            except:
                raise RuntimeError("%s already exists, and is not a handle" % path)

            if name and name != installed_handle.name:
                raise ValueError(
                    "Different handle (%s) is already installed under %s" % (installed_handle.name, install_path)
                )
        else:
            # install the handle:
            installed_handle = HandleRepo(install_path, url, create=True)

        local_name = name or handle_name or installed_handle.name
        if name_prefix is not None:
            # TODO: Yarik is asking why?  how would we later decipher which one is local and which one remote????
            # Ben is answering: Why do we need to? If installed we want to use
            # the local handle instead of the remote one when addressing it in
            # datalad command, don't we? If we install
            # "somecollection/Myhandle" and later use a datalad command with
            # "somecollection/Myhandle" this should lead to the installed one,
            # I think.
            local_name = name_prefix + local_name

        # "register" handle only if not yet known
        if local_name not in known_handles:
            local_master.add_handle(installed_handle, name=local_name)

        # Import metadata of the handle, if there's any.
        # Priorities: First try to get metadata from the handle itself,
        # if there's none, then get whatever is stored in the collection it was
        # installed from.
        # TODO: Discuss this approach. May be it makes more sense to always use
        # the metadata from the collection, if the handle was installed that
        # way.

        if exists(opj(installed_handle.path, HANDLE_META_DIR, REPO_STD_META_FILE)):
            local_master.import_metadata_to_handle(
                CustomImporter, key=local_name, files=opj(installed_handle.path, HANDLE_META_DIR)
            )
        elif name_prefix is not None:
            # installed from  collection
            # get the metadata from that remote collection:
            metadata = dict()
            files = [f for f in local_master.git_get_files(name_prefix + "master") if f.startswith(handle_name)]
            for file_ in files:
                metadata[file_[len(handle_name) + 1 :]] = local_master.git_get_file_content(
                    file_, name_prefix + "master"
                )
            local_master.import_metadata_to_handle(CustomImporter, key=local_name, data=metadata)

        return Handle(HandleRepoBackend(installed_handle))
Example #16
0
    def __call__(
        self,
        subject=None,
        author=None,
        author_orcid=None,
        author_email=None,
        author_page=None,
        license=None,
        description=None,
    ):
        """
        Returns
        -------
        Handle or Collection
        """
        repo = get_repo_instance()

        if isinstance(repo, CollectionRepo):
            target_class = "Collection"
            if subject in [repo.name, None]:
                about_class = "Collection"
                about_uri = DLNS.this
                files = repo.path
            elif subject in repo.get_handle_list():
                about_class = "Handle"
                about_uri = URIRef(CollectionRepoHandleBackend(repo, subject).url)
                files = opj(repo.path, repo._key2filename(subject))
            else:
                # TODO: look for internal entities as subject
                lgr.error("Subject '%s' unknwon." % subject)
                raise RuntimeError("Subject '%s' unknwon." % subject)
        elif isinstance(repo, HandleRepo):
            target_class = "Handle"
            if subject in [repo.name, None]:
                about_class = "Handle"
                about_uri = DLNS.this
                files = opj(repo.path, HANDLE_META_DIR)
            else:
                # TODO: look for internal entities as subject
                lgr.error("Subject '%s' unknwon." % subject)
                raise RuntimeError("Subject '%s' unknwon." % subject)
        else:
            lgr.error("Don't know how to handle object of class %s" % repo.__class__)
            raise RuntimeError("Don't know how to handle object of class %s" % repo.__class__)

        importer = CustomImporter(target_class=target_class, about_class=about_class, about_uri=about_uri)
        # read existing metadata:
        importer.import_data(files)
        graph = importer.get_graphs()[REPO_STD_META_FILE[0:-4]]

        if about_uri not in graph.all_nodes():
            # TODO: When arbitrary entities are allowed, this has to change.
            raise RuntimeError("Didn't find URI '%s' in datalad graph." % about_uri)

        # add the metadata:

        # create author node in graph;
        # choose most unique identifier available:
        a_node = None
        if author_orcid is not None:
            a_node = URIRef(author_orcid)
        elif author_email is not None:
            a_node = URIRef("mailto:" + author_email)
        elif author_page is not None:
            a_node = URIRef(author_page)
        elif author is not None:
            a_node = EMP.__getattr__("author")

        # assign author's properties:
        if a_node is not None:
            graph.add((about_uri, PAV.createdBy, a_node))
            graph.add((a_node, RDF.type, PROV.Person))
            graph.add((a_node, RDF.type, FOAF.Person))

            if author_email is not None:
                graph.add((a_node, FOAF.mbox, URIRef("mailto:" + author_email)))
            if author_page is not None:
                graph.add((a_node, FOAF.homepage, URIRef(author_page)))
            if author is not None:
                graph.add((a_node, FOAF.name, Literal(author)))

        if license is not None:
            if isfile(license):
                with open(license, "r") as f:
                    l_content = f.readlines()
                graph.add((about_uri, DCTERMS.license, Literal("".join(l_content))))
            # TODO: possible URL, dictionary of known URLs
            else:
                graph.add((about_uri, DCTERMS.license, Literal(license)))

        if description is not None:
            if isfile(description):
                with open(description, "r") as f:
                    d_content = f.readlines()
                graph.add((about_uri, DCTERMS.description, Literal("".join(d_content))))
            else:
                graph.add((about_uri, DCTERMS.description, Literal(description)))

        # save:
        importer.store_data(files)
        if isinstance(repo, HandleRepo):
            repo.add_to_git(files, "Metadata changed.")
        elif isinstance(repo, CollectionRepo):
            repo.git_add([f for f in listdir(repo.path) if f.endswith(".ttl")])
            repo.git_commit("Metadata changed.")

        # Update metadata of local master collection:
        local_master = get_datalad_master()

        if isinstance(repo, CollectionRepo):
            # update master if it is a registered collection:
            for c in local_master.git_get_remotes():
                if repo.path == local_master.git_get_remote_url(c):
                    local_master.git_fetch(c)
        elif isinstance(repo, HandleRepo):
            # update master if it is an installed handle:
            for h in local_master.get_handle_list():
                if repo.path == urlparse(CollectionRepoHandleBackend(local_master, h).url).path:
                    local_master.import_metadata_to_handle(CustomImporter, key=h, files=opj(repo.path, HANDLE_META_DIR))

        # TODO: What to do in case of a handle, if it is part of another
        # locally available collection than just the master?

        if isinstance(repo, CollectionRepo):
            return Collection(CollectionRepoBackend(repo))
        elif isinstance(repo, HandleRepo):
            return Handle(HandleRepoBackend(repo))
    def __call__(self, name):

        local_master = get_datalad_master()
        local_master.git_remote_remove(name)