def test_beautiful(self): h_en = Hyphenator('en_US') self.assertEqual([['beau', 'tiful'], [u'beauti', 'ful']], h_en.pairs('beautiful')) self.assertEqual(['beau-', 'tiful'], h_en.wrap('beautiful', 6)) self.assertEqual(['beauti-', 'ful'], h_en.wrap('beautiful', 7)) self.assertEqual(['beau', 'ti', 'ful'], h_en.syllables('beautiful'))
from hyphen import Hyphenator # Create some hyphenators h_de = Hyphenator('de_DE') h_en = Hyphenator('en_US') h_es = Hyphenator('es_ES') # Now hyphenate some words # Note: the following examples are written in Python 3.x syntax. # If you use Python 2.x, you must add the 'u' prefixes as Hyphenator methods expect unicode strings. print(h_en.pairs('beautiful')) #, [['beau', 'tiful'], [u'beauti', 'ful']]) print(h_en.wrap('beautiful', 6)) #['beau-', 'tiful'] print(h_en.wrap('beautiful', 7)) #['beauti-', 'ful'] print(h_en.syllables('beautiful')) #['beau', 'ti', 'ful'] from textwrap2 import fill print(fill('very long text...', width=40, use_hyphenator=h_en))
class Paragraph(Text): def __init__(self, text, margin, indent, lang="en_US", hyphen_char="\u2010"): Text.__init__(self, text, lang) self.margin = margin self.indent = indent if self.lang in dict_info.keys(): self.hyphenator = Hyphenator(self.lang) else: self.hyphenator = None self.hyphen_char = hyphen_char self.header.c = " " * self.indent def justify(self): e = self.header while e.next: # we go to the element which may have followed by consecutive elements but they together fit margin space while e.next and e.next.very_end() <= self.margin: e = e.next # now we go to the last break point before margin e = e.next_break() remaining_space = self.margin - e.end() if e.next: hyphen_next_word = self.hyphenator.wrap(e.next.c, remaining_space, hyphen=self.hyphen_char) if hyphen_next_word: # we save the next element after the hyphenated word element_next_after = e.next.next # we replace the hyphenated word with its first part, e.next = Word(hyphen_next_word[0][: -len(self.hyphen_char)]) e.next.link(e) # add a hyphen char e.next.next = Punctation(self.hyphen_char) e.next.next.link(e.next) # insert a newline e.next.next.next = Break("\n") e.next.next.next.link(e.next.next) # we also save this for complete justification later j = e.next.next.next # and put the second part of hyphenated word to the beginning of new line e.next.next.next.next = Word(hyphen_next_word[1]) e.next.next.next.next.link(e.next.next.next) # and link the saved element to it element_next_after.link(e.next.next.next.next) e = element_next_after else: e.newlineize() # we also save this for complete justification later j = e # we also go to next e for our while loop e = e.next # now we try to fill our line with whitespaces ... # ... for this we collect whitespace elements of the current line spaces = [] c_e = j while c_e.prev != j.line_start(): if isinstance(c_e, Break) and c_e.space(): spaces.append(c_e) c_e = c_e.prev # and increase the size of them until line is filled while j.start() < self.margin: # get the minimum length of spaces minimum_length_space = 1000 for i in spaces: if i.length() < minimum_length_space: minimum_length_space = i.length() # now we get nice kind of spaces minimum_spaces = set() priority_spaces = set() for i in spaces: if i.length() == minimum_length_space: minimum_spaces.add(i) if i.next_to_punctation(): priority_spaces.add(i) # let's try to get one randomly from the intersection of the two if len(minimum_spaces & priority_spaces) > 0: b = random.sample(minimum_spaces & priority_spaces, 1)[0] else: b = random.sample(minimum_spaces, 1)[0] # now increase b length b.c += " "