Esempio n. 1
0
    def test_fill(self):
        hyphenator = hyphen.Hyphenator("en_US")
        wrapped = textwrap2.fill("A thing of beauty is a joy forever.",
                                 width=10,
                                 use_hyphenator=hyphenator)
        # Note that this wrapping depends on the content of the language dictionary
        self.assertEqual("""A thing of
beauty is
a joy for-
ever.""", wrapped)
Esempio n. 2
0
    def __init__(self, filename, lang):
        if lang == 'de':
            self.hyphenation = hyphen.Hyphenator('de_DE')
        elif lang == 'en':
            self.hyphenation = hyphen.Hyphenator('en_US')
        else:
            raise Exception(
                'Requested language not supported (Supported: \'en\', \'de\').'
            )

        self.lang = lang

        with open(filename, 'r') as content_file:
            self.content = content_file.read()

        self.words = re.findall(r'[a-zA-ZäöüÄÖÜß]+', self.content)

        self.n_sentences = self.count_sentences()
        self.n_words = len(self.words)
        self.n_syllables = self.count_syllables()
# Copyright (C) 2018  David Arroyo Menéndez

# Author: David Arroyo Menéndez <*****@*****.**>
# Maintainer: David Arroyo Menéndez <*****@*****.**>

# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with GNU Emacs; see the file COPYING.  If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA,

import hyphen
h = hyphen.Hyphenator('en_US')
print(h.syllables(u"Hammond's"))
#It's just included in one syllable
# [u'Ham', u"mond's"]
# But if I do the same using the German dictionary

h = hyphen.Hyphenator('de_CH')
print(h.syllables(u"Hammond's"))
print(h.syllables(u"Bismarck'sche"))
Esempio n. 4
0
def is_polysyllabic(w, hyphenator=hyphen.Hyphenator('en_US')):
    if len(w) > 30: return False
    return (len(hyphenator.syllables(unicode(w))) >= 3)
Esempio n. 5
0
 def __init__(self, dataset, locale='en_US', reverse=False):
     self._hyphenator = hyphen.Hyphenator(locale)
     self._bigrams_dict = self.__build__bigrams_dictionary(
         self.__compute_bigrams(dataset), reverse)
Esempio n. 6
0
 def set_language(self, language):
     self.hyphenator = hyphen.Hyphenator(language=language,
                                         directory=DICTIONARIES_DIR)
Esempio n. 7
0
 def __init__(self, language):
     self.hyphenator = hyphen.Hyphenator(language=language,
                                         directory=DICTIONARIES_DIR)
Esempio n. 8
0
import os
from os import path
import sys, codecs
import arrow
from string import ascii_letters

import markdown, md_extensions
from PIL import Image
from jinja2 import Environment, FileSystemLoader
import yaml

# Prepare hyphen, load (or install) dictionary...
import hyphen
import hyphen.dictools
if not hyphen.dictools.is_installed('en_US'): hyphen.dictools.install('en_US')
h = hyphen.Hyphenator('en_US')

# DEFAULTS
# Common screen resolutions to fit images in (defaults are all about width).
# None means auto-adapt.
IMAGE_RESIZES = (
    480,
    #600,
    #768,
    900,
    #1080,
    #1200,
    1600,
)