def indexes_generator(indexes_lang): """ factory that acts according to glossary language :param indexes_lang: str """ indexer = None """Callable[[Sequence[str], str], Sequence[str]]""" if indexes_lang: from . import indexes as idxs indexer = idxs.languages.get(indexes_lang, None) if not indexer: msg = "extended indexes not supported for the specified language: %s.\n"\ "following languages avaible: %s." %\ (indexes_lang, ', '.join(list(idxs.languages.keys()))) log.error(msg) raise ValueError(msg) def generate_indexes(title, alts, content, BeautifulSoup): indexes = [title] indexes.extend(alts) if BeautifulSoup: quoted_title = BeautifulSoup.dammit.EntitySubstitution.substitute_xml( title, True) else: quoted_title = '"%s"' % title.replace('>', '>').replace( '"', """) if indexer: indexes = set(indexer(indexes, content)) normal_indexes = set() for idx in indexes: normal = _normalize.title(idx, BeautifulSoup) normal_indexes.add(_normalize.title_long(normal)) normal_indexes.add(_normalize.title_short(normal)) normal_indexes.discard(title) normal_indexes = [s for s in normal_indexes if s.strip()] # skip empty titles. everything could happen. s = '<d:index d:value=%s d:title=%s/>' % (quoted_title, quoted_title) if BeautifulSoup: for idx in normal_indexes: s += '<d:index d:value=%s d:title=%s/>' % ( BeautifulSoup.dammit.EntitySubstitution.substitute_xml( idx, True), quoted_title) else: for idx in normal_indexes: s += '<d:index d:value="%s" d:title=%s/>' % (idx.replace( '>', '>').replace('"', """), quoted_title) return s return generate_indexes
def indexes_generator(indexes_lang): """ factory that acts according to glossary language :param indexes_lang: str """ indexer = None """Callable[[Sequence[str], str], Sequence[str]]""" if indexes_lang: from . import indexes as idxs indexer = idxs.languages.get(indexes_lang, None) if not indexer: msg = "extended indexes not supported for the specified language: %s.\n"\ "following languages avaible: %s." %\ (indexes_lang, ", ".join(list(idxs.languages.keys()))) log.error(msg) raise ValueError(msg) def generate_indexes(title, alts, content, BeautifulSoup): indexes = [title] indexes.extend(alts) if BeautifulSoup: quoted_title = BeautifulSoup.dammit.EntitySubstitution.substitute_xml(title, True) else: quoted_title = '"%s"' % title.replace(">", ">").replace('"', """) if indexer: indexes = set(indexer(indexes, content)) normal_indexes = set() for idx in indexes: normal = _normalize.title(idx, BeautifulSoup) normal_indexes.add(_normalize.title_long(normal)) normal_indexes.add(_normalize.title_short(normal)) normal_indexes.discard(title) normal_indexes = [s for s in normal_indexes if s.strip()] # skip empty titles. everything could happen. s = "<d:index d:value=%s d:title=%s/>" % (quoted_title, quoted_title) if BeautifulSoup: for idx in normal_indexes: s += "<d:index d:value=%s d:title=%s/>" % ( BeautifulSoup.dammit.EntitySubstitution.substitute_xml(idx, True), quoted_title) else: for idx in normal_indexes: s += '<d:index d:value="%s" d:title=%s/>' % ( idx.replace(">", ">").replace('"', """), quoted_title) return s return generate_indexes
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. """ Russian indexes based on pymorphy. """ from . import languages from pyglossary.plugins.formats_common import log try: import pymorphy2 except ImportError: log.error( """module pymorphy2 is required to build extended Russian indexes. You can download it here: http://pymorphy2.readthedocs.org/en/latest/. Or by running: sudo pip3 install pymorphy2""") raise morphy = pymorphy2.MorphAnalyzer() def ru(titles, _): """ gives a set of all declines, cases and other froms of word `title`. note that it works only if title is one word. :type titles: Sequence[str] :rtype: Set[str] """ indexes = set()
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. """ Russian indexes based on pymorphy. """ from . import languages from pyglossary.plugins.formats_common import log try: import pymorphy2 except ImportError: log.error( """module pymorphy2 is required to build extended russian indexes. \ you can download it here: http://pymorphy2.readthedocs.org/en/latest/. \ or run `pip3 install pymorphy2`. """) raise else: morphy = pymorphy2.MorphAnalyzer() def ru(titles, _): """ gives a set of all declines, cases and other froms of word `title`. note that it works only if title is one word. :type titles: Sequence[str] :rtype: Set[str] """
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. """ Russian indexes based on pymorphy. """ from . import languages from pyglossary.plugins.formats_common import log try: import pymorphy2 except ImportError: log.error("""module pymorphy2 is required to build extended russian indexes. \ you can download it here: http://pymorphy2.readthedocs.org/en/latest/. \ or run `pip3 install pymorphy2`. """) raise else: morphy = pymorphy2.MorphAnalyzer() def ru(titles, _): """ gives a set of all declines, cases and other froms of word `title`. note that it works only if title is one word. :type titles: Sequence[str] :rtype: Set[str] """ indexes = set()
__all__ = ['languages', 'log'] languages = {} """ Dict[str, Callable[[Sequence[str], str], Sequence[str]]] submodules must register languages by adding (language name -> function) pairs to the mapping. function must follow signature bellow: :param titles: flat iterable of title and altenrative titles :param content: cleaned entry content :return: iterable of indexes (str). use ``` from . import languages # or from appledict.indexes import languages ``` """ here = os.path.dirname(os.path.abspath(__file__)) for _, module, _ in pkgutil.iter_modules([here]): try: __import__('%s.%s' % (__name__, module)) except ImportError: log.error("error while importing indexes plugin %s" % module, exc_info=1)
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. """ Chinese wildcard and pinyin indexes. """ from pyglossary.plugins.formats_common import log import re import bs4 try: import colorize_pinyin as color except ImportError: log.error( """module colorize_pinyin is required to build extended Chinese indexes. You can install it by running: sudo pip3 install colorize-pinyin""") raise from . import languages, log def zh(titles, content): """ Chinese indexes. assuming that content is HTML and pinyin is inside second tag (first is <h1>), we can try to parse pinyin and generate indexes with pinyin subwords separated by whitespaces - pinyin itself - pinyin with diacritics replaced by tone numbers