def hyphenate(value, arg=None, autoescape=None): if autoescape: esc = conditional_escape else: esc = lambda x: x if arg: args = arg.split(u',') code = args[0] if len(args) > 1: minlen = int(args[1]) else: minlen = 5 else: code = settings.LANGUAGE_CODE s = code.split(u'-') lang = s[0].lower() + u'_' + s[1].upper() if not dictools.is_installed(lang): dictools.install(lang) h = hyphenator(lang) new = [] for word in value.split(u' '): if len(word) > minlen and word.isalpha(): new.append(u'­'.join(h.syllables(word))) else: new.append(word) result = u' '.join(new) return mark_safe(result)
def hyphenate(self): for lang in self.languages: logging.info('Hyphenating %s with dictionary %s', self.filename, lang) if is_installed(lang): h = hyphenator(lang) else: h = hyphenator(lang, '.') output_filename = ''.join(('output/', lang, '.', self.filename)) f = codecs.open(output_filename, 'w', self.encoding) logging.info('Writing to output file %s...', output_filename) f.writelines('Output file created by hyphen_test.\nFormat: word * pairs.\n==========\n') for w in self.words: pairs_str = ' / '.join(['- '.join((p[0], p[1])) for p in h.pairs(w)]) f.write(' * '.join((w, pairs_str))) f.write('.\n') f.write('*** Word list completed. ***') f.close() logging.info('Finished %s.', output_filename)
def build_phonetic_database(words, output_stream): h = hyphenator() for word in words: syls = h.syllables(unicode(word)) if len(syls) == 1: continue pairs = pair_with_next(syls) print pairs for (x,y) in pairs: print >>output_stream, x, y
def hyphenate(self): for lang in self.languages: logging.info('Hyphenating %s with dictionary %s', self.filename, lang) if is_installed(lang): h = hyphenator(lang) else: h = hyphenator(lang, '.') output_filename = ''.join(('output/', lang, '.', self.filename)) f = codecs.open(output_filename, 'w', self.encoding) logging.info('Writing to output file %s...', output_filename) f.writelines('Output file created by hyphen_test.\n\ Format: word * pairs.\n==========\n') for w in self.words: pairs_str = ' / '.join( ['- '.join((p[0], p[1])) for p in h.pairs(w)]) f.write(' * '.join((w, pairs_str))) f.write('.\n') f.write('*** Word list completed. ***') f.close() logging.info('Finished %s.', output_filename)
def hyphenate(value, arg=None, autoescape=None): lang='en_US' minlen = 5 h = hyphenator(lang,directory=settings.DEFAULT_DIC_PATH) new = [] for word in value.split(u' '): if len(word) > minlen and word.isalpha(): new.append(u'­'.join(h.syllables(word))) else: new.append(word) result = u' '.join(new) return result
def hyphenate(value, arg=None, autoescape=None): default_dic_path = '/home/ford/sites/ftrain.com/htdocs/ftrain/' lang='en_US' minlen = 5 h = hyphenator(lang,directory=default_dic_path) new = [] for word in value.split(u' '): if len(word) > minlen and word.isalpha(): new.append(u'­'.join(h.syllables(word))) else: new.append(word) result = u' '.join(new) return result
def __init__ (self, language="EN", minWordLength=4, quality=8, hyphenDir=None, **options ): ExplicitHyphenator.__init__(self,language=language,minWordLength=minWordLength,**options) if hyphenDir is None: hyphenDir = os.path.join(os.path.split(__file__)[0], "..", "dict") fname = os.path.join(hyphenDir, "hyph_%s.dic" % language) if not dictools.is_installed(language, directory=hyphenDir): dictools.install(language, directory=hyphenDir) print "installed dictionary for %s into %s" % (language, hyphenDir) self.hnj = pyhyphen.hyphenator(language, directory=hyphenDir) self.quality = quality
def __init__(self, language="EN", minWordLength=4, quality=8, hyphenDir=None, **options): ExplicitHyphenator.__init__(self, language=language, minWordLength=minWordLength, **options) if hyphenDir is None: hyphenDir = os.path.join(os.path.split(__file__)[0], "..", "dict") fname = os.path.join(hyphenDir, "hyph_%s.dic" % language) if not dictools.is_installed(language, directory=hyphenDir): dictools.install(language, directory=hyphenDir) print "installed dictionary for %s into %s" % (language, hyphenDir) self.hnj = pyhyphen.hyphenator(language, directory=hyphenDir) self.quality = quality
import sys import json """ 2.7 and up version is capitalized (annoying) """ if sys.version_info >= (2, 7): from hyphen import Hyphenator, dictools hy = Hyphenator('en_US') else: from hyphen import hyphenator, dictools hy = hyphenator('en_US') try: json_object = {} for word in sys.argv[1:]: json_object[word] = hy.syllables(unicode(word)) print json.dumps(json_object) except IndexError: sys.exit(1) sys.exit(0)
#!/usr/bin/python from hyphen import hyphenator from hyphen.dictools import * from xml.sax import make_parser from xml.sax.handler import ContentHandler lang="pl_PL" if not is_installed(lang): install(lang) h_pl = hyphenator('pl_PL') class XMLHyphenator(ContentHandler): def __init__ (self): self.isPointsElement, self.isReboundsElement = 0, 0 self.outputXML=[] def startElement(self, name, attrs): self.outputXML.append("<"+name) for i in attrs.keys(): self.outputXML.append(" "+i+"=\""+attrs.get(i,"")+"\"") self.outputXML.append(">") return def endElement(self, name): self.outputXML.append("</"+name+">") return def characters (self, ch): t=ch.split(" ") for i in t: s=i if i.__len__()>5: if i[len(i)-1]==",": i=h_pl.inserted(i.replace(",",""))+"," else: