Exemple #1
0
def decompress_file(archive, dir_, leading_directories='strip'):
    """Decompress `archive` into a directory `dir_`

    Parameters
    ----------
    archive: str
    dir_: str
    leading_directories: {'strip', None}
      If `strip`, and archive contains a single leading directory under which
      all content is stored, all the content will be moved one directory up
      and that leading directory will be removed.
    """
    if not exists(dir_):
        lgr.debug("Creating directory %s to extract archive into", dir_)
        os.makedirs(dir_)

    _decompress_file(archive, dir_)

    if leading_directories == 'strip':
        _, dirs, files = next(os.walk(dir_))
        if not len(files) and len(dirs) == 1:
            # move all the content under dirs[0] up 1 level
            widow_dir = opj(dir_, dirs[0])
            lgr.debug("Moving content within %s upstairs", widow_dir)
            subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0])))
            for f in subdirs_ + files_:
                os.rename(opj(subdir, f), opj(dir_, f))
            # NFS might hold it victim so use rmtree so it tries a few times
            rmtree(widow_dir)
    elif leading_directories is None:
        pass  # really do nothing
    else:
        raise NotImplementedError("Not supported %s" % leading_directories)
Exemple #2
0
def test_sibling_inherit_no_super_remote(basedir):
    ds_source = Dataset(opj(basedir, "source")).create()
    ds_super = Dataset(opj(basedir, "super")).create()
    ds_clone = ds_super.clone(
        source=ds_source.path, path="clone")
    # Adding a sibling with inherit=True doesn't crash when the superdataset
    # doesn't have a remote `name`.
    ds_clone.siblings(action="add", name="donotexist", inherit=True,
                      url=ds_source.path, result_renderer=None)
Exemple #3
0
    def __init__(self, toppath=None, persistent=False):

        self._toppath = toppath
        if toppath:
            path = opj(toppath, ARCHIVES_TEMP_DIR)
            if not persistent:
                tempsuffix = "-" + _get_random_id()
                lgr.debug("For non-persistent archives using %s suffix for path %s",
                          tempsuffix, path)
                path += tempsuffix
        else:
            if persistent:
                raise ValueError("%s cannot be persistent since no toppath was provided" % self)
            path = tempfile.mktemp(**get_tempfile_kwargs())
        self._path = path
        self.persistent = persistent
        # TODO?  ensure that it is absent or we should allow for it to persist a bit?
        #if exists(path):
        #    self._clean_cache()
        self._archives = {}

        # TODO: begging for a race condition
        if not exists(path):
            lgr.debug("Initiating clean cache for the archives under %s" % self.path)
            try:
                self._made_path = True
                os.makedirs(path)
                lgr.debug("Cache initialized")
            except Exception as e:
                lgr.error("Failed to initialize cached under %s" % path)
                raise
        else:
            lgr.debug("Not initiating existing cache for the archives under %s" % self.path)
            self._made_path = False
 def get_extracted_files(self):
     """Generator to provide filenames which are available under extracted archive
     """
     path = self.assure_extracted()
     path_len = len(path) + (len(os.sep) if not path.endswith(os.sep) else 0)
     for root, dirs, files in os.walk(path):  # TEMP
         for name in files:
             yield ensure_unicode(opj(root, name)[path_len:])
Exemple #5
0
def test_sibling_inherit(basedir):
    ds_source = Dataset(opj(basedir, "source")).create()

    # In superdataset, set up remote "source" that has git-annex group "grp".
    ds_super = Dataset(opj(basedir, "super")).create()
    ds_super.siblings(action="add", name="source", url=ds_source.path,
                      annex_group="grp", result_renderer=None)

    ds_clone = ds_super.clone(
        source=ds_source.path, path="clone")
    # In a subdataset, adding a "source" sibling with inherit=True pulls in
    # that configuration.
    ds_clone.siblings(action="add", name="source", url=ds_source.path,
                      inherit=True, result_renderer=None)
    res = ds_clone.siblings(action="query", name="source",
                            result_renderer=None)
    eq_(res[0]["annex-group"], "grp")
Exemple #6
0
    def get_archive(self, archive):
        archive = self._get_normalized_archive_path(archive)

        if archive not in self._archives:
            self._archives[archive] = \
                ExtractedArchive(archive,
                                 opj(self.path, _get_cached_filename(archive)),
                                 persistent=self.persistent)

        return self._archives[archive]
Exemple #7
0
def test_sibling_path_is_posix(basedir=None, otherpath=None):
    ds_source = Dataset(opj(basedir, "source")).create()
    # add remote with system native path
    ds_source.siblings(action="add",
                       name="donotexist",
                       url=otherpath,
                       result_renderer='disabled')
    res = ds_source.siblings(action="query",
                             name="donotexist",
                             result_renderer='disabled',
                             return_type='item-or-list')
    # path URL should come out POSIX as if `git clone` had configured it for origin
    # https://github.com/datalad/datalad/issues/3972
    eq_(res['url'], Path(otherpath).as_posix())
Exemple #8
0
    def get_leading_directory(self, depth=None, consider=None, exclude=None):
        """Return leading directory of the content within archive

        Parameters
        ----------
        depth: int or None, optional
          Maximal depth of leading directories to consider.  If None - no upper
          limit
        consider : list of str, optional
          Regular expressions for file/directory names to be considered (before
          exclude). Applied to the entire relative path to the file as in the archive
        exclude: list of str, optional
          Regular expressions for file/directory names to be excluded from consideration.
          Applied to the entire relative path to the file as in the archive

        Returns
        -------
        str or None:
          If there is no single leading directory -- None returned
        """
        leading = None
        # returns only files, so no need to check if a dir or not
        for fpath in self.get_extracted_files():
            if consider and not any_re_search(consider, fpath):
                continue
            if exclude and any_re_search(exclude, fpath):
                continue

            lpath = fpath.split(opsep)
            dpath = lpath[:-1]  # directory path components
            if leading is None:
                leading = dpath if depth is None else dpath[:depth]
            else:
                if dpath[:len(leading)] != leading:
                    # find smallest common path
                    leading_ = []
                    # TODO: there might be more efficient pythonic way
                    for d1, d2 in zip(leading, dpath):
                        if d1 != d2:
                            break
                        leading_.append(d1)
                    leading = leading_
            if not len(leading):
                # no common leading - ready to exit
                return None
        return leading if leading is None else opj(*leading)
Exemple #9
0
    def _get_normalized_archive_path(self, archive):
        """Return full path to archive

        So we have consistent operation from different subdirs,
        while referencing archives from the topdir

        TODO: why do we need it???
        """
        if not isabs(archive) and self._toppath:
            out = normpath(opj(self._toppath, archive))
            if relpath(out, self._toppath).startswith(pardir):
                raise RuntimeError("%s points outside of the topdir %s" %
                                   (archive, self._toppath))
            if isdir(out):
                raise RuntimeError("got a directory here... bleh")
            return out
        return archive
Exemple #10
0
    def get_extracted_filename(self, afile):
        """Return full path to the `afile` within extracted `archive`

        It does not actually extract any archive
        """
        return opj(self.path, afile)
Exemple #11
0
def test_siblings(origin, repo_path, local_clone_path):

    sshurl = "ssh://push-remote.example.com"
    httpurl1 = "http://remote1.example.com/location"
    httpurl2 = "http://remote2.example.com/location"

    # insufficient arguments
    # we need a dataset to work at
    with chpwd(repo_path):  # not yet there
        assert_raises(InsufficientArgumentsError,
                      siblings, 'add', url=httpurl1)

    # prepare src
    source = install(repo_path, source=origin, recursive=True)
    # pollute config
    depvar = 'remote.test-remote.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')

    # cannot configure unknown remotes as dependencies
    res = siblings(
        'configure',
        dataset=source,
        name="test-remote",
        url=httpurl1,
        publish_depends=['r1', 'r2'],
        on_failure='ignore',
        result_renderer=None)
    assert_status('error', res)
    eq_(res[0]['message'],
        ('unknown sibling(s) specified as publication dependency: %s',
         set(('r1', 'r2'))))
    # prior config was not changed by failed call above
    eq_(source.config.get(depvar, None), 'stupid')

    res = siblings('configure',
                   dataset=source, name="test-remote",
                   url=httpurl1,
                   result_xfm='paths',
                   result_renderer=None)

    eq_(res, [source.path])
    assert_in("test-remote", source.repo.get_remotes())
    eq_(httpurl1,
        source.repo.get_remote_url("test-remote"))

    # reconfiguring doesn't change anything
    siblings('configure', dataset=source, name="test-remote",
             url=httpurl1,
             result_renderer=None)
    assert_in("test-remote", source.repo.get_remotes())
    eq_(httpurl1,
        source.repo.get_remote_url("test-remote"))
    # re-adding doesn't work
    res = siblings('add', dataset=source, name="test-remote",
                   url=httpurl1, on_failure='ignore',
                   result_renderer=None)
    assert_status('error', res)
    # only after removal
    res = siblings('remove', dataset=source, name="test-remote",
                   result_renderer=None)
    assert_status('ok', res)
    assert_not_in("test-remote", source.repo.get_remotes())
    res = siblings('add', dataset=source, name="test-remote",
                   url=httpurl1, on_failure='ignore',
                   result_renderer=None)
    assert_status('ok', res)

    # add to another remote automagically taking it from the url
    # and being in the dataset directory
    with chpwd(source.path):
        res = siblings('add', url=httpurl2,
                       result_renderer=None)
    assert_result_count(
        res, 1,
        name="remote2.example.com", type='sibling')
    assert_in("remote2.example.com", source.repo.get_remotes())

    # don't fail with conflicting url, when using force:
    res = siblings('configure',
                   dataset=source, name="test-remote",
                   url=httpurl1 + "/elsewhere",
                   result_renderer=None)
    assert_status('ok', res)
    eq_(httpurl1 + "/elsewhere",
        source.repo.get_remote_url("test-remote"))

    # no longer a use case, I would need additional convincing that
    # this is anyhow useful other then tripple checking other peoples
    # errors. for an actual check use 'query'
    # maybe it could be turned into a set of warnings when `configure`
    # alters an existing setting, but then why call configure, if you
    # want to keep the old values
    #with assert_raises(RuntimeError) as cm:
    #    add_sibling(dataset=source, name="test-remote",
    #                url=httpurl1 + "/elsewhere")
    #assert_in("""'test-remote' already exists with conflicting settings""",
    #          str(cm.exception))
    ## add a push url without force fails, since in a way the fetch url is the
    ## configured push url, too, in that case:
    #with assert_raises(RuntimeError) as cm:
    #    add_sibling(dataset=source, name="test-remote",
    #                url=httpurl1 + "/elsewhere",
    #                pushurl=sshurl, force=False)
    #assert_in("""'test-remote' already exists with conflicting settings""",
    #          str(cm.exception))

    # add push url (force):
    res = siblings('configure',
                   dataset=source, name="test-remote",
                   url=httpurl1 + "/elsewhere",
                   pushurl=sshurl,
                   result_renderer=None)
    assert_status('ok', res)
    eq_(httpurl1 + "/elsewhere",
        source.repo.get_remote_url("test-remote"))
    eq_(sshurl,
        source.repo.get_remote_url("test-remote", push=True))

    # recursively:
    for r in siblings(
            'configure',
            dataset=source, name="test-remote",
            url=httpurl1 + "/%NAME",
            pushurl=sshurl + "/%NAME",
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote", repo.get_remotes())
        url = repo.get_remote_url("test-remote")
        pushurl = repo.get_remote_url("test-remote", push=True)
        ok_(url.startswith(httpurl1 + '/' + basename(source.path)))
        ok_(url.endswith(basename(repo.path)))
        ok_(pushurl.startswith(sshurl + '/' + basename(source.path)))
        ok_(pushurl.endswith(basename(repo.path)))
        eq_(url, r['url'])
        eq_(pushurl, r['pushurl'])

    # recursively without template:
    for r in siblings(
            'configure',
            dataset=source, name="test-remote-2",
            url=httpurl1,
            pushurl=sshurl,
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False,
            result_renderer=None):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote-2", repo.get_remotes())
        url = repo.get_remote_url("test-remote-2")
        pushurl = repo.get_remote_url("test-remote-2", push=True)
        ok_(url.startswith(httpurl1))
        ok_(pushurl.startswith(sshurl))
        # FIXME: next condition used to compare the *Repo objects instead of
        # there paths. Due to missing annex-init in
        # datalad/tests/utils.py:clone_url this might not be the same, since
        # `source` actually is an annex, but after flavor 'clone' in
        # `with_testrepos` and then `install` any trace of an annex might be
        # gone in v5 (branch 'master' only), while in direct mode it still is
        # considered an annex. `repo` is forced to be a `GitRepo`, so we might
        # compare two objects of different classes while they actually are
        # pointing to the same repository.
        # See github issue #1854
        if repo.path != source.repo.path:
            ok_(url.endswith('/' + basename(repo.path)))
            ok_(pushurl.endswith(basename(repo.path)))
        eq_(url, r['url'])
        eq_(pushurl, r['pushurl'])

    # recursively without template and pushurl but full "hierarchy"
    # to a local clone
    for r in siblings(
            'configure',
            dataset=source,
            name="test-remote-3",
            url=local_clone_path,
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False,
            result_renderer=None):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote-3", repo.get_remotes())
        url = repo.get_remote_url("test-remote-3")
        pushurl = repo.get_remote_url("test-remote-3", push=True)

        eq_(normpath(url),
            normpath(opj(local_clone_path,
                         relpath(str(r['path']), source.path))))
        # https://github.com/datalad/datalad/issues/3951
        ok_(not pushurl)  # no pushurl should be defined