def test_fill(self): hyphenator = hyphen.Hyphenator("en_US") wrapped = textwrap2.fill("A thing of beauty is a joy forever.", width=10, use_hyphenator=hyphenator) # Note that this wrapping depends on the content of the language dictionary self.assertEqual("""A thing of beauty is a joy for- ever.""", wrapped)
def __init__(self, filename, lang): if lang == 'de': self.hyphenation = hyphen.Hyphenator('de_DE') elif lang == 'en': self.hyphenation = hyphen.Hyphenator('en_US') else: raise Exception( 'Requested language not supported (Supported: \'en\', \'de\').' ) self.lang = lang with open(filename, 'r') as content_file: self.content = content_file.read() self.words = re.findall(r'[a-zA-ZäöüÄÖÜß]+', self.content) self.n_sentences = self.count_sentences() self.n_words = len(self.words) self.n_syllables = self.count_syllables()
# Copyright (C) 2018 David Arroyo Menéndez # Author: David Arroyo Menéndez <*****@*****.**> # Maintainer: David Arroyo Menéndez <*****@*****.**> # This file is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # This file is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with GNU Emacs; see the file COPYING. If not, write to # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA, import hyphen h = hyphen.Hyphenator('en_US') print(h.syllables(u"Hammond's")) #It's just included in one syllable # [u'Ham', u"mond's"] # But if I do the same using the German dictionary h = hyphen.Hyphenator('de_CH') print(h.syllables(u"Hammond's")) print(h.syllables(u"Bismarck'sche"))
def is_polysyllabic(w, hyphenator=hyphen.Hyphenator('en_US')): if len(w) > 30: return False return (len(hyphenator.syllables(unicode(w))) >= 3)
def __init__(self, dataset, locale='en_US', reverse=False): self._hyphenator = hyphen.Hyphenator(locale) self._bigrams_dict = self.__build__bigrams_dictionary( self.__compute_bigrams(dataset), reverse)
def set_language(self, language): self.hyphenator = hyphen.Hyphenator(language=language, directory=DICTIONARIES_DIR)
def __init__(self, language): self.hyphenator = hyphen.Hyphenator(language=language, directory=DICTIONARIES_DIR)
import os from os import path import sys, codecs import arrow from string import ascii_letters import markdown, md_extensions from PIL import Image from jinja2 import Environment, FileSystemLoader import yaml # Prepare hyphen, load (or install) dictionary... import hyphen import hyphen.dictools if not hyphen.dictools.is_installed('en_US'): hyphen.dictools.install('en_US') h = hyphen.Hyphenator('en_US') # DEFAULTS # Common screen resolutions to fit images in (defaults are all about width). # None means auto-adapt. IMAGE_RESIZES = ( 480, #600, #768, 900, #1080, #1200, 1600, )