Ejemplo n.º 1
0
    def __init__(self,
                 repos_dict=None,
                 repos_par=None,
                 with_caching=True,
                 repo_nexml2json=None,
                 git_ssh=None,
                 pkey=None,
                 git_action_class=GitAction,
                 mirror_info=None,
                 new_study_prefix=None,
                 infrastructure_commit_author='OpenTree API <*****@*****.**>',
                 **kwargs):
        '''
        Repos can be found by passing in a `repos_par` (a directory that is the parent of the repos)
            or by trusting the `repos_dict` mapping of name to repo filepath.
        `with_caching` should be True for non-debugging uses.
        `repo_nexml2json` is optional. If specified all PhylesystemShard repos are assumed to store
            files of this version of nexson syntax.
        `git_ssh` is the path of an executable for git-ssh operations.
        `pkey` is the PKEY that has to be in the env for remote, authenticated operations to work
        `git_action_class` is a subclass of GitAction to use. the __init__ syntax must be compatible
            with GitAction
        If you want to use a mirrors of the repo for pushes or pulls, send in a `mirror_info` dict:
            mirror_info['push'] and mirror_info['pull'] should be dicts with the following keys:
            'parent_dir' - the parent directory of the mirrored repos
            'remote_map' - a dictionary of remote name to prefix (the repo name + '.git' will be
                appended to create the URL for pushing).
        '''
        _PhylesystemBase.__init__(self)
        if repos_dict is not None:
            self._filepath_args = 'repos_dict = {}'.format(repr(repos_dict))
        elif repos_par is not None:
            self._filepath_args = 'repos_par = {}'.format(repr(repos_par))
        else:
            fmt = '<No arg> default phylesystem_parent from {}'
            a = _get_phylesystem_parent_with_source(**kwargs)[1]
            self._filepath_args = fmt.format(a)
        push_mirror_repos_par = None
        push_mirror_remote_map = {}
        if mirror_info:
            push_mirror_info = mirror_info.get('push', {})
            if push_mirror_info:
                push_mirror_repos_par = push_mirror_info['parent_dir']
                push_mirror_remote_map = push_mirror_info.get('remote_map', {})
                if push_mirror_repos_par:
                    if not os.path.exists(push_mirror_repos_par):
                        os.makedirs(push_mirror_repos_par)
                    if not os.path.isdir(push_mirror_repos_par):
                        e_fmt = 'Specified push_mirror_repos_par, "{}", is not a directory'
                        e = e_fmt.format(push_mirror_repos_par)
                        raise ValueError(e)
        if repos_dict is None:
            repos_dict = get_repos(repos_par, **kwargs)
        shards = []
        repo_name_list = list(repos_dict.keys())
        repo_name_list.sort()
        for repo_name in repo_name_list:
            repo_filepath = repos_dict[repo_name]
            push_mirror_repo_path = None
            if push_mirror_repos_par:
                expected_push_mirror_repo_path = os.path.join(push_mirror_repos_par, repo_name)
                if os.path.isdir(expected_push_mirror_repo_path):
                    push_mirror_repo_path = expected_push_mirror_repo_path
            try:
                shard = PhylesystemShard(repo_name,
                                         repo_filepath,
                                         git_ssh=git_ssh,
                                         pkey=pkey,
                                         repo_nexml2json=repo_nexml2json,
                                         git_action_class=git_action_class,
                                         push_mirror_repo_path=push_mirror_repo_path,
                                         new_study_prefix=new_study_prefix,
                                         infrastructure_commit_author=infrastructure_commit_author)
            except NotAPhylesystemShardError as x:
                f = 'Git repo "{d}" found in your phylesystem parent, but it does not appear to be a phylesystem ' \
                    'shard. Please report this as a bug if this directory is supposed to be phylesystem shard. '\
                    'The triggering error message was:\n{e}'
                f = f.format(d=repo_filepath, e=str(x))
                _LOG.warn(f)
                continue
            # if the mirror does not exist, clone it...
            if push_mirror_repos_par and (push_mirror_repo_path is None):
                GitAction.clone_repo(push_mirror_repos_par,
                                     repo_name,
                                     repo_filepath)
                if not os.path.isdir(expected_push_mirror_repo_path):
                    e_msg = 'git clone in mirror bootstrapping did not produce a directory at {}'
                    e = e_msg.format(expected_push_mirror_repo_path)
                    raise ValueError(e)
                for remote_name, remote_url_prefix in push_mirror_remote_map.items():
                    if remote_name in ['origin', 'originssh']:
                        f = '"{}" is a protected remote name in the mirrored repo setup'
                        m = f.format(remote_name)
                        raise ValueError(m)
                    remote_url = remote_url_prefix + '/' + repo_name + '.git'
                    GitAction.add_remote(expected_push_mirror_repo_path, remote_name, remote_url)
                shard.push_mirror_repo_path = expected_push_mirror_repo_path
                for remote_name in push_mirror_remote_map.keys():
                    mga = shard._create_git_action_for_mirror() #pylint: disable=W0212
                    mga.fetch(remote_name)
            shards.append(shard)

        self._shards = shards
        self._growing_shard = shards[-1] # generalize with config...
        self._prefix2shard = {}
        for shard in shards:
            for prefix in shard.known_prefixes:
                assert prefix not in self._prefix2shard # we don't currently support multiple shards with the same ID prefix scheme
                self._prefix2shard[prefix] = shard
        with self._index_lock:
            self._locked_refresh_study_ids()
        self.repo_nexml2json = shards[-1].repo_nexml2json
        if with_caching:
            self._cache_region = _make_phylesystem_cache_region()
        else:
            self._cache_region = None
        self.git_action_class = git_action_class
        self._cache_hits = 0
Ejemplo n.º 2
0
    def __init__(self,
                 prefix_from_doc_id,
                 repos_dict=None,
                 repos_par=None,
                 with_caching=True,
                 assumed_doc_version=None,
                 git_ssh=None,
                 pkey=None,
                 git_action_class=None,  # requires a *type-specific* GitActionBase subclass
                 git_shard_class=None,  # requires a *type-specific* GitShard subclass
                 mirror_info=None,
                 new_doc_prefix=None,
                 infrastructure_commit_author='OpenTree API <*****@*****.**>',
                 **kwargs):
        """
        Repos can be found by passing in a `repos_par` (a directory that is the parent of the repos)
            or by trusting the `repos_dict` mapping of name to repo filepath.
        `prefix_from_doc_id` should be a type-specific method defined in the subclass
        `with_caching` should be True for non-debugging uses.
        `assumed_doc_version` is optional. If specified all shard repos are assumed to store
            files of this version of the primary document syntax.
        `git_ssh` is the path of an executable for git-ssh operations.
        `pkey` is the PKEY that has to be in the env for remote, authenticated operations to work
        `git_action_class` is a subclass of GitActionBase to use. the __init__ syntax must be compatible
            with PhylesystemGitAction
        If you want to use a mirrors of the repo for pushes or pulls, send in a `mirror_info` dict:
            mirror_info['push'] and mirror_info['pull'] should be dicts with the following keys:
            'parent_dir' - the parent directory of the mirrored repos
            'remote_map' - a dictionary of remote name to prefix (the repo name + '.git' will be
                appended to create the URL for pushing).
        """
        from peyotl.phylesystem.helper import get_repos, _get_phylesystem_parent_with_source
        ShardedDocStore.__init__(self,
                                 prefix_from_doc_id=prefix_from_doc_id)
        self.assumed_doc_version = assumed_doc_version
        if repos_dict is not None:
            self._filepath_args = 'repos_dict = {}'.format(repr(repos_dict))
        elif repos_par is not None:
            self._filepath_args = 'repos_par = {}'.format(repr(repos_par))
        else:
            fmt = '<No arg> default phylesystem_parent from {}'
            a = _get_phylesystem_parent_with_source(**kwargs)[1]
            self._filepath_args = fmt.format(a)
        push_mirror_repos_par = None
        push_mirror_remote_map = {}
        if mirror_info:
            push_mirror_info = mirror_info.get('push', {})
            if push_mirror_info:
                push_mirror_repos_par = push_mirror_info['parent_dir']
                push_mirror_remote_map = push_mirror_info.get('remote_map', {})
                if push_mirror_repos_par:
                    if not os.path.exists(push_mirror_repos_par):
                        os.makedirs(push_mirror_repos_par)
                    if not os.path.isdir(push_mirror_repos_par):
                        e_fmt = 'Specified push_mirror_repos_par, "{}", is not a directory'
                        e = e_fmt.format(push_mirror_repos_par)
                        raise ValueError(e)
        if repos_dict is None:
            repos_dict = get_repos(repos_par, **kwargs)
        shards = []
        repo_name_list = list(repos_dict.keys())
        repo_name_list.sort()
        for repo_name in repo_name_list:
            repo_filepath = repos_dict[repo_name]
            push_mirror_repo_path = None
            if push_mirror_repos_par:
                expected_push_mirror_repo_path = os.path.join(push_mirror_repos_par, repo_name)
                if os.path.isdir(expected_push_mirror_repo_path):
                    push_mirror_repo_path = expected_push_mirror_repo_path
            try:
                # assumes uniform __init__ arguments for all GitShard subclasses
                shard = git_shard_class(repo_name,
                                        repo_filepath,
                                        assumed_doc_version,
                                        git_ssh,
                                        pkey,
                                        git_action_class,
                                        push_mirror_repo_path,
                                        new_doc_prefix,
                                        infrastructure_commit_author)
            except FailedShardCreationError as x:
                f = 'SKIPPING repo "{d}" (not a {c}). Details:\n  {e}'
                f = f.format(d=repo_filepath, c=git_shard_class.__name__, e=str(x))
                _LOG.warn(f)
                continue
            # if the mirror does not exist, clone it...
            if push_mirror_repos_par and (push_mirror_repo_path is None):
                from peyotl.git_storage import GitActionBase
                GitActionBase.clone_repo(push_mirror_repos_par,
                                         repo_name,
                                         repo_filepath)
                if not os.path.isdir(expected_push_mirror_repo_path):
                    e_msg = 'git clone in mirror bootstrapping did not produce a directory at {}'
                    e = e_msg.format(expected_push_mirror_repo_path)
                    raise ValueError(e)
                for remote_name, remote_url_prefix in push_mirror_remote_map.items():
                    if remote_name in ['origin', 'originssh']:
                        f = '"{}" is a protected remote name in the mirrored repo setup'
                        m = f.format(remote_name)
                        raise ValueError(m)
                    remote_url = remote_url_prefix + '/' + repo_name + '.git'
                    GitActionBase.add_remote(expected_push_mirror_repo_path, remote_name, remote_url)
                shard.push_mirror_repo_path = expected_push_mirror_repo_path
                for remote_name in push_mirror_remote_map.keys():
                    mga = shard._create_git_action_for_mirror()  # pylint: disable=W0212
                    mga.fetch(remote_name)
            shards.append(shard)

        self._shards = shards
        if len(shards) < 1:
            self._growing_shard = None
        else:
            self._growing_shard = shards[-1]  # generalize with config...
        self._prefix2shard = {}
        for shard in shards:
            for prefix in shard.known_prefixes:
                # we don't currently support multiple shards with the same ID prefix scheme
                assert prefix not in self._prefix2shard
                self._prefix2shard[prefix] = shard
        with self._index_lock:
            self._locked_refresh_doc_ids()
        if self.assumed_doc_version is None:
            # if no version was specified, try to pick it up from a shard's contents (using auto-detect)
            if self._growing_shard:
                self.assumed_doc_version = self._growing_shard.assumed_doc_version
        self.git_action_class = git_action_class
Ejemplo n.º 3
0
    def __init__(
            self,
            prefix_from_doc_id,
            repos_dict=None,
            repos_par=None,
            with_caching=True,
            assumed_doc_version=None,
            git_ssh=None,
            pkey=None,
            git_action_class=None,  # requires a *type-specific* GitActionBase subclass
            git_shard_class=None,  # requires a *type-specific* GitShard subclass
            mirror_info=None,
            new_doc_prefix=None,
            infrastructure_commit_author='OpenTree API <*****@*****.**>',
            **kwargs):
        """
        Repos can be found by passing in a `repos_par` (a directory that is the parent of the repos)
            or by trusting the `repos_dict` mapping of name to repo filepath.
        `prefix_from_doc_id` should be a type-specific method defined in the subclass
        `with_caching` should be True for non-debugging uses.
        `assumed_doc_version` is optional. If specified all shard repos are assumed to store
            files of this version of the primary document syntax.
        `git_ssh` is the path of an executable for git-ssh operations.
        `pkey` is the PKEY that has to be in the env for remote, authenticated operations to work
        `git_action_class` is a subclass of GitActionBase to use. the __init__ syntax must be compatible
            with PhylesystemGitAction
        If you want to use a mirrors of the repo for pushes or pulls, send in a `mirror_info` dict:
            mirror_info['push'] and mirror_info['pull'] should be dicts with the following keys:
            'parent_dir' - the parent directory of the mirrored repos
            'remote_map' - a dictionary of remote name to prefix (the repo name + '.git' will be
                appended to create the URL for pushing).
        """
        from peyotl.phylesystem.helper import get_repos, _get_phylesystem_parent_with_source
        ShardedDocStore.__init__(self, prefix_from_doc_id=prefix_from_doc_id)
        self.assumed_doc_version = assumed_doc_version
        if repos_dict is not None:
            self._filepath_args = 'repos_dict = {}'.format(repr(repos_dict))
        elif repos_par is not None:
            self._filepath_args = 'repos_par = {}'.format(repr(repos_par))
        else:
            fmt = '<No arg> default phylesystem_parent from {}'
            a = _get_phylesystem_parent_with_source(**kwargs)[1]
            self._filepath_args = fmt.format(a)
        push_mirror_repos_par = None
        push_mirror_remote_map = {}
        if mirror_info:
            push_mirror_info = mirror_info.get('push', {})
            if push_mirror_info:
                push_mirror_repos_par = push_mirror_info['parent_dir']
                push_mirror_remote_map = push_mirror_info.get('remote_map', {})
                if push_mirror_repos_par:
                    if not os.path.exists(push_mirror_repos_par):
                        os.makedirs(push_mirror_repos_par)
                    if not os.path.isdir(push_mirror_repos_par):
                        e_fmt = 'Specified push_mirror_repos_par, "{}", is not a directory'
                        e = e_fmt.format(push_mirror_repos_par)
                        raise ValueError(e)
        if repos_dict is None:
            repos_dict = get_repos(repos_par, **kwargs)
        shards = []
        repo_name_list = list(repos_dict.keys())
        repo_name_list.sort()
        for repo_name in repo_name_list:
            repo_filepath = repos_dict[repo_name]
            push_mirror_repo_path = None
            if push_mirror_repos_par:
                expected_push_mirror_repo_path = os.path.join(
                    push_mirror_repos_par, repo_name)
                if os.path.isdir(expected_push_mirror_repo_path):
                    push_mirror_repo_path = expected_push_mirror_repo_path
            try:
                # assumes uniform __init__ arguments for all GitShard subclasses
                shard = git_shard_class(repo_name, repo_filepath,
                                        assumed_doc_version, git_ssh, pkey,
                                        git_action_class,
                                        push_mirror_repo_path, new_doc_prefix,
                                        infrastructure_commit_author)
            except FailedShardCreationError as x:
                f = 'SKIPPING repo "{d}" (not a {c}). Details:\n  {e}'
                f = f.format(d=repo_filepath,
                             c=git_shard_class.__name__,
                             e=str(x))
                _LOG.warn(f)
                continue
            # if the mirror does not exist, clone it...
            if push_mirror_repos_par and (push_mirror_repo_path is None):
                from peyotl.git_storage import GitActionBase
                GitActionBase.clone_repo(push_mirror_repos_par, repo_name,
                                         repo_filepath)
                if not os.path.isdir(expected_push_mirror_repo_path):
                    e_msg = 'git clone in mirror bootstrapping did not produce a directory at {}'
                    e = e_msg.format(expected_push_mirror_repo_path)
                    raise ValueError(e)
                for remote_name, remote_url_prefix in push_mirror_remote_map.items(
                ):
                    if remote_name in ['origin', 'originssh']:
                        f = '"{}" is a protected remote name in the mirrored repo setup'
                        m = f.format(remote_name)
                        raise ValueError(m)
                    remote_url = remote_url_prefix + '/' + repo_name + '.git'
                    GitActionBase.add_remote(expected_push_mirror_repo_path,
                                             remote_name, remote_url)
                shard.push_mirror_repo_path = expected_push_mirror_repo_path
                for remote_name in push_mirror_remote_map.keys():
                    mga = shard._create_git_action_for_mirror()  # pylint: disable=W0212
                    mga.fetch(remote_name)
            shards.append(shard)

        self._shards = shards
        if len(shards) < 1:
            self._growing_shard = None
        else:
            self._growing_shard = shards[-1]  # generalize with config...
        self._prefix2shard = {}
        for shard in shards:
            for prefix in shard.known_prefixes:
                # we don't currently support multiple shards with the same ID prefix scheme
                assert prefix not in self._prefix2shard
                self._prefix2shard[prefix] = shard
        with self._index_lock:
            self._locked_refresh_doc_ids()
        if self.assumed_doc_version is None:
            # if no version was specified, try to pick it up from a shard's contents (using auto-detect)
            if self._growing_shard:
                self.assumed_doc_version = self._growing_shard.assumed_doc_version
        self.git_action_class = git_action_class
Ejemplo n.º 4
0
    def __init__(
            self,
            repos_dict=None,
            repos_par=None,
            with_caching=True,
            repo_nexml2json=None,
            git_ssh=None,
            pkey=None,
            git_action_class=GitAction,
            mirror_info=None,
            new_study_prefix=None,
            infrastructure_commit_author='OpenTree API <*****@*****.**>',
            **kwargs):
        '''
        Repos can be found by passing in a `repos_par` (a directory that is the parent of the repos)
            or by trusting the `repos_dict` mapping of name to repo filepath.
        `with_caching` should be True for non-debugging uses.
        `repo_nexml2json` is optional. If specified all PhylesystemShard repos are assumed to store
            files of this version of nexson syntax.
        `git_ssh` is the path of an executable for git-ssh operations.
        `pkey` is the PKEY that has to be in the env for remote, authenticated operations to work
        `git_action_class` is a subclass of GitAction to use. the __init__ syntax must be compatible
            with GitAction
        If you want to use a mirrors of the repo for pushes or pulls, send in a `mirror_info` dict:
            mirror_info['push'] and mirror_info['pull'] should be dicts with the following keys:
            'parent_dir' - the parent directory of the mirrored repos
            'remote_map' - a dictionary of remote name to prefix (the repo name + '.git' will be
                appended to create the URL for pushing).
        '''
        _PhylesystemBase.__init__(self)
        if repos_dict is not None:
            self._filepath_args = 'repos_dict = {}'.format(repr(repos_dict))
        elif repos_par is not None:
            self._filepath_args = 'repos_par = {}'.format(repr(repos_par))
        else:
            fmt = '<No arg> default phylesystem_parent from {}'
            a = _get_phylesystem_parent_with_source(**kwargs)[1]
            self._filepath_args = fmt.format(a)
        push_mirror_repos_par = None
        push_mirror_remote_map = {}
        if mirror_info:
            push_mirror_info = mirror_info.get('push', {})
            if push_mirror_info:
                push_mirror_repos_par = push_mirror_info['parent_dir']
                push_mirror_remote_map = push_mirror_info.get('remote_map', {})
                if push_mirror_repos_par:
                    if not os.path.exists(push_mirror_repos_par):
                        os.makedirs(push_mirror_repos_par)
                    if not os.path.isdir(push_mirror_repos_par):
                        e_fmt = 'Specified push_mirror_repos_par, "{}", is not a directory'
                        e = e_fmt.format(push_mirror_repos_par)
                        raise ValueError(e)
        if repos_dict is None:
            repos_dict = get_repos(repos_par, **kwargs)
        shards = []
        repo_name_list = list(repos_dict.keys())
        repo_name_list.sort()
        for repo_name in repo_name_list:
            repo_filepath = repos_dict[repo_name]
            push_mirror_repo_path = None
            if push_mirror_repos_par:
                expected_push_mirror_repo_path = os.path.join(
                    push_mirror_repos_par, repo_name)
                if os.path.isdir(expected_push_mirror_repo_path):
                    push_mirror_repo_path = expected_push_mirror_repo_path
            shard = PhylesystemShard(
                repo_name,
                repo_filepath,
                git_ssh=git_ssh,
                pkey=pkey,
                repo_nexml2json=repo_nexml2json,
                git_action_class=git_action_class,
                push_mirror_repo_path=push_mirror_repo_path,
                new_study_prefix=new_study_prefix,
                infrastructure_commit_author=infrastructure_commit_author)
            # if the mirror does not exist, clone it...
            if push_mirror_repos_par and (push_mirror_repo_path is None):
                GitAction.clone_repo(push_mirror_repos_par, repo_name,
                                     repo_filepath)
                if not os.path.isdir(expected_push_mirror_repo_path):
                    e_msg = 'git clone in mirror bootstrapping did not produce a directory at {}'
                    e = e_msg.format(expected_push_mirror_repo_path)
                    raise ValueError(e)
                for remote_name, remote_url_prefix in push_mirror_remote_map.items(
                ):
                    if remote_name in ['origin', 'originssh']:
                        f = '"{}" is a protected remote name in the mirrored repo setup'
                        m = f.format(remote_name)
                        raise ValueError(m)
                    remote_url = remote_url_prefix + '/' + repo_name + '.git'
                    GitAction.add_remote(expected_push_mirror_repo_path,
                                         remote_name, remote_url)
                shard.push_mirror_repo_path = expected_push_mirror_repo_path
                for remote_name in push_mirror_remote_map.keys():
                    mga = shard._create_git_action_for_mirror()  #pylint: disable=W0212
                    mga.fetch(remote_name)
            shards.append(shard)

        self._shards = shards
        self._growing_shard = shards[-1]  # generalize with config...
        self._prefix2shard = {}
        for shard in shards:
            for prefix in shard.known_prefixes:
                assert prefix not in self._prefix2shard  # we don't currently support multiple shards with the same ID prefix scheme
                self._prefix2shard[prefix] = shard
        with self._index_lock:
            self._locked_refresh_study_ids()
        self.repo_nexml2json = shards[-1].repo_nexml2json
        if with_caching:
            self._cache_region = _make_phylesystem_cache_region()
        else:
            self._cache_region = None
        self.git_action_class = git_action_class
        self._cache_hits = 0