Example #1
0
    def __init__(
            self,
            repo,
            remote=None,
            git_ssh=None,
            pkey=None,
            cache=None,  # pylint: disable=W0613
            path_for_doc_fn=None,
            max_file_size=None):
        """Create a GitAction object to interact with a Git repository

        Example:
        gd   = PhylesystemGitAction(repo="/home/user/git/foo")

        Note that this requires write access to the
        git repository directory, so it can create a
        lockfile in the .git directory.

        """
        GitActionBase.__init__(
            self,
            'nexson',
            repo,
            remote,
            git_ssh,
            pkey,
            cache,
            path_for_doc_fn,
            max_file_size,
            path_for_doc_id_fn=get_filepath_for_namespaced_id)
Example #2
0
    def __init__(
            self,
            repo,
            remote=None,
            git_ssh=None,
            pkey=None,
            cache=None,  # pylint: disable=W0613
            path_for_doc_fn=None,
            max_file_size=None):
        """GitActionBase subclass to interact with a Git repository

        Example:
        gd   = TaxonomicAmendmentsGitAction(repo="/home/user/git/foo")

        Note that this requires write access to the
        git repository directory, so it can create a
        lockfile in the .git directory.

        """
        GitActionBase.__init__(self,
                               'amendment',
                               repo,
                               remote,
                               git_ssh,
                               pkey,
                               cache,
                               path_for_doc_fn,
                               max_file_size,
                               path_for_doc_id_fn=get_filepath_for_id)
Example #3
0
    def __init__(self,
                 repo,
                 remote=None,
                 git_ssh=None,
                 pkey=None,
                 cache=None,  # pylint: disable=W0613
                 path_for_doc_fn=None,
                 max_file_size=None):
        """GitActionBase subclass to interact with a Git repository

        Example:
        gd   = TreeCollectionsGitAction(repo="/home/user/git/foo")

        Note that this requires write access to the
        git repository directory, so it can create a
        lockfile in the .git directory.

        """
        GitActionBase.__init__(self,
                               'collection',
                               repo,
                               remote,
                               git_ssh,
                               pkey,
                               cache,
                               path_for_doc_fn,
                               max_file_size,
                               path_for_doc_id_fn=get_filepath_for_id)
Example #4
0
    def __init__(
            self,
            prefix_from_doc_id,
            repos_dict=None,
            repos_par=None,
            with_caching=True,
            assumed_doc_version=None,
            git_ssh=None,
            pkey=None,
            git_action_class=None,  # requires a *type-specific* GitActionBase subclass
            git_shard_class=None,  # requires a *type-specific* GitShard subclass
            mirror_info=None,
            new_doc_prefix=None,
            infrastructure_commit_author='OpenTree API <*****@*****.**>',
            **kwargs):
        """
        Repos can be found by passing in a `repos_par` (a directory that is the parent of the repos)
            or by trusting the `repos_dict` mapping of name to repo filepath.
        `prefix_from_doc_id` should be a type-specific method defined in the subclass
        `with_caching` should be True for non-debugging uses.
        `assumed_doc_version` is optional. If specified all shard repos are assumed to store
            files of this version of the primary document syntax.
        `git_ssh` is the path of an executable for git-ssh operations.
        `pkey` is the PKEY that has to be in the env for remote, authenticated operations to work
        `git_action_class` is a subclass of GitActionBase to use. the __init__ syntax must be compatible
            with PhylesystemGitAction
        If you want to use a mirrors of the repo for pushes or pulls, send in a `mirror_info` dict:
            mirror_info['push'] and mirror_info['pull'] should be dicts with the following keys:
            'parent_dir' - the parent directory of the mirrored repos
            'remote_map' - a dictionary of remote name to prefix (the repo name + '.git' will be
                appended to create the URL for pushing).
        """
        from peyotl.phylesystem.helper import get_repos, _get_phylesystem_parent_with_source
        ShardedDocStore.__init__(self, prefix_from_doc_id=prefix_from_doc_id)
        self.assumed_doc_version = assumed_doc_version
        if repos_dict is not None:
            self._filepath_args = 'repos_dict = {}'.format(repr(repos_dict))
        elif repos_par is not None:
            self._filepath_args = 'repos_par = {}'.format(repr(repos_par))
        else:
            fmt = '<No arg> default phylesystem_parent from {}'
            a = _get_phylesystem_parent_with_source(**kwargs)[1]
            self._filepath_args = fmt.format(a)
        push_mirror_repos_par = None
        push_mirror_remote_map = {}
        if mirror_info:
            push_mirror_info = mirror_info.get('push', {})
            if push_mirror_info:
                push_mirror_repos_par = push_mirror_info['parent_dir']
                push_mirror_remote_map = push_mirror_info.get('remote_map', {})
                if push_mirror_repos_par:
                    if not os.path.exists(push_mirror_repos_par):
                        os.makedirs(push_mirror_repos_par)
                    if not os.path.isdir(push_mirror_repos_par):
                        e_fmt = 'Specified push_mirror_repos_par, "{}", is not a directory'
                        e = e_fmt.format(push_mirror_repos_par)
                        raise ValueError(e)
        if repos_dict is None:
            repos_dict = get_repos(repos_par, **kwargs)
        shards = []
        repo_name_list = list(repos_dict.keys())
        repo_name_list.sort()
        for repo_name in repo_name_list:
            repo_filepath = repos_dict[repo_name]
            push_mirror_repo_path = None
            if push_mirror_repos_par:
                expected_push_mirror_repo_path = os.path.join(
                    push_mirror_repos_par, repo_name)
                if os.path.isdir(expected_push_mirror_repo_path):
                    push_mirror_repo_path = expected_push_mirror_repo_path
            try:
                # assumes uniform __init__ arguments for all GitShard subclasses
                shard = git_shard_class(repo_name, repo_filepath,
                                        assumed_doc_version, git_ssh, pkey,
                                        git_action_class,
                                        push_mirror_repo_path, new_doc_prefix,
                                        infrastructure_commit_author)
            except FailedShardCreationError as x:
                f = 'SKIPPING repo "{d}" (not a {c}). Details:\n  {e}'
                f = f.format(d=repo_filepath,
                             c=git_shard_class.__name__,
                             e=str(x))
                _LOG.warn(f)
                continue
            # if the mirror does not exist, clone it...
            if push_mirror_repos_par and (push_mirror_repo_path is None):
                from peyotl.git_storage import GitActionBase
                GitActionBase.clone_repo(push_mirror_repos_par, repo_name,
                                         repo_filepath)
                if not os.path.isdir(expected_push_mirror_repo_path):
                    e_msg = 'git clone in mirror bootstrapping did not produce a directory at {}'
                    e = e_msg.format(expected_push_mirror_repo_path)
                    raise ValueError(e)
                for remote_name, remote_url_prefix in push_mirror_remote_map.items(
                ):
                    if remote_name in ['origin', 'originssh']:
                        f = '"{}" is a protected remote name in the mirrored repo setup'
                        m = f.format(remote_name)
                        raise ValueError(m)
                    remote_url = remote_url_prefix + '/' + repo_name + '.git'
                    GitActionBase.add_remote(expected_push_mirror_repo_path,
                                             remote_name, remote_url)
                shard.push_mirror_repo_path = expected_push_mirror_repo_path
                for remote_name in push_mirror_remote_map.keys():
                    mga = shard._create_git_action_for_mirror()  # pylint: disable=W0212
                    mga.fetch(remote_name)
            shards.append(shard)

        self._shards = shards
        if len(shards) < 1:
            self._growing_shard = None
        else:
            self._growing_shard = shards[-1]  # generalize with config...
        self._prefix2shard = {}
        for shard in shards:
            for prefix in shard.known_prefixes:
                # we don't currently support multiple shards with the same ID prefix scheme
                assert prefix not in self._prefix2shard
                self._prefix2shard[prefix] = shard
        with self._index_lock:
            self._locked_refresh_doc_ids()
        if self.assumed_doc_version is None:
            # if no version was specified, try to pick it up from a shard's contents (using auto-detect)
            if self._growing_shard:
                self.assumed_doc_version = self._growing_shard.assumed_doc_version
        self.git_action_class = git_action_class
    def __init__(self,
                 prefix_from_doc_id,
                 repos_dict=None,
                 repos_par=None,
                 with_caching=True,
                 assumed_doc_version=None,
                 git_ssh=None,
                 pkey=None,
                 git_action_class=None,  # requires a *type-specific* GitActionBase subclass
                 git_shard_class=None,  # requires a *type-specific* GitShard subclass
                 mirror_info=None,
                 new_doc_prefix=None,
                 infrastructure_commit_author='OpenTree API <*****@*****.**>',
                 **kwargs):
        """
        Repos can be found by passing in a `repos_par` (a directory that is the parent of the repos)
            or by trusting the `repos_dict` mapping of name to repo filepath.
        `prefix_from_doc_id` should be a type-specific method defined in the subclass
        `with_caching` should be True for non-debugging uses.
        `assumed_doc_version` is optional. If specified all shard repos are assumed to store
            files of this version of the primary document syntax.
        `git_ssh` is the path of an executable for git-ssh operations.
        `pkey` is the PKEY that has to be in the env for remote, authenticated operations to work
        `git_action_class` is a subclass of GitActionBase to use. the __init__ syntax must be compatible
            with PhylesystemGitAction
        If you want to use a mirrors of the repo for pushes or pulls, send in a `mirror_info` dict:
            mirror_info['push'] and mirror_info['pull'] should be dicts with the following keys:
            'parent_dir' - the parent directory of the mirrored repos
            'remote_map' - a dictionary of remote name to prefix (the repo name + '.git' will be
                appended to create the URL for pushing).
        """
        from peyotl.phylesystem.helper import get_repos, _get_phylesystem_parent_with_source
        ShardedDocStore.__init__(self,
                                 prefix_from_doc_id=prefix_from_doc_id)
        self.assumed_doc_version = assumed_doc_version
        if repos_dict is not None:
            self._filepath_args = 'repos_dict = {}'.format(repr(repos_dict))
        elif repos_par is not None:
            self._filepath_args = 'repos_par = {}'.format(repr(repos_par))
        else:
            fmt = '<No arg> default phylesystem_parent from {}'
            a = _get_phylesystem_parent_with_source(**kwargs)[1]
            self._filepath_args = fmt.format(a)
        push_mirror_repos_par = None
        push_mirror_remote_map = {}
        if mirror_info:
            push_mirror_info = mirror_info.get('push', {})
            if push_mirror_info:
                push_mirror_repos_par = push_mirror_info['parent_dir']
                push_mirror_remote_map = push_mirror_info.get('remote_map', {})
                if push_mirror_repos_par:
                    if not os.path.exists(push_mirror_repos_par):
                        os.makedirs(push_mirror_repos_par)
                    if not os.path.isdir(push_mirror_repos_par):
                        e_fmt = 'Specified push_mirror_repos_par, "{}", is not a directory'
                        e = e_fmt.format(push_mirror_repos_par)
                        raise ValueError(e)
        if repos_dict is None:
            repos_dict = get_repos(repos_par, **kwargs)
        shards = []
        repo_name_list = list(repos_dict.keys())
        repo_name_list.sort()
        for repo_name in repo_name_list:
            repo_filepath = repos_dict[repo_name]
            push_mirror_repo_path = None
            if push_mirror_repos_par:
                expected_push_mirror_repo_path = os.path.join(push_mirror_repos_par, repo_name)
                if os.path.isdir(expected_push_mirror_repo_path):
                    push_mirror_repo_path = expected_push_mirror_repo_path
            try:
                # assumes uniform __init__ arguments for all GitShard subclasses
                shard = git_shard_class(repo_name,
                                        repo_filepath,
                                        assumed_doc_version,
                                        git_ssh,
                                        pkey,
                                        git_action_class,
                                        push_mirror_repo_path,
                                        new_doc_prefix,
                                        infrastructure_commit_author)
            except FailedShardCreationError as x:
                f = 'SKIPPING repo "{d}" (not a {c}). Details:\n  {e}'
                f = f.format(d=repo_filepath, c=git_shard_class.__name__, e=str(x))
                _LOG.warn(f)
                continue
            # if the mirror does not exist, clone it...
            if push_mirror_repos_par and (push_mirror_repo_path is None):
                from peyotl.git_storage import GitActionBase
                GitActionBase.clone_repo(push_mirror_repos_par,
                                         repo_name,
                                         repo_filepath)
                if not os.path.isdir(expected_push_mirror_repo_path):
                    e_msg = 'git clone in mirror bootstrapping did not produce a directory at {}'
                    e = e_msg.format(expected_push_mirror_repo_path)
                    raise ValueError(e)
                for remote_name, remote_url_prefix in push_mirror_remote_map.items():
                    if remote_name in ['origin', 'originssh']:
                        f = '"{}" is a protected remote name in the mirrored repo setup'
                        m = f.format(remote_name)
                        raise ValueError(m)
                    remote_url = remote_url_prefix + '/' + repo_name + '.git'
                    GitActionBase.add_remote(expected_push_mirror_repo_path, remote_name, remote_url)
                shard.push_mirror_repo_path = expected_push_mirror_repo_path
                for remote_name in push_mirror_remote_map.keys():
                    mga = shard._create_git_action_for_mirror()  # pylint: disable=W0212
                    mga.fetch(remote_name)
            shards.append(shard)

        self._shards = shards
        if len(shards) < 1:
            self._growing_shard = None
        else:
            self._growing_shard = shards[-1]  # generalize with config...
        self._prefix2shard = {}
        for shard in shards:
            for prefix in shard.known_prefixes:
                # we don't currently support multiple shards with the same ID prefix scheme
                assert prefix not in self._prefix2shard
                self._prefix2shard[prefix] = shard
        with self._index_lock:
            self._locked_refresh_doc_ids()
        if self.assumed_doc_version is None:
            # if no version was specified, try to pick it up from a shard's contents (using auto-detect)
            if self._growing_shard:
                self.assumed_doc_version = self._growing_shard.assumed_doc_version
        self.git_action_class = git_action_class