Esempio n. 1
0
def hyphenate(value, arg=None, autoescape=None):
    if autoescape:
        esc = conditional_escape
    else:
        esc = lambda x: x
    
    if arg:
        args = arg.split(u',')
        code = args[0]
        if len(args) > 1:
            minlen = int(args[1])
        else:
            minlen = 5
    else:
        code = settings.LANGUAGE_CODE
    s = code.split(u'-')
    lang = s[0].lower() + u'_' + s[1].upper()
    
    if not dictools.is_installed(lang): 
        dictools.install(lang)
        
    h = hyphenator(lang)
    new = []
    for word in value.split(u' '):
        if len(word) > minlen and word.isalpha():
            new.append(u'­'.join(h.syllables(word)))
        else:
            new.append(word)
    
    result = u' '.join(new)
    return mark_safe(result)
Esempio n. 2
0
 def hyphenate(self):
     for lang in self.languages:
         logging.info('Hyphenating %s with dictionary %s', self.filename, lang)
         if is_installed(lang): h = hyphenator(lang)
         else:
             h = hyphenator(lang, '.')
         output_filename = ''.join(('output/', lang, '.',  self.filename))
         f = codecs.open(output_filename, 'w', self.encoding)
         logging.info('Writing to output file %s...', output_filename)
         f.writelines('Output file created by hyphen_test.\nFormat: word * pairs.\n==========\n')
         for w in self.words:
             pairs_str = ' / '.join(['- '.join((p[0], p[1])) for p in h.pairs(w)])
             f.write(' * '.join((w, pairs_str)))
             f.write('.\n')
         f.write('*** Word list completed. ***')
         f.close()
         logging.info('Finished %s.', output_filename)
def build_phonetic_database(words, output_stream):    
    h = hyphenator()
    for word in words:
        syls = h.syllables(unicode(word))
        if len(syls) == 1: continue
        pairs = pair_with_next(syls)
        print pairs
        for (x,y) in pairs:
            print >>output_stream, x, y
Esempio n. 4
0
 def hyphenate(self):
     for lang in self.languages:
         logging.info('Hyphenating %s with dictionary %s', self.filename,
                      lang)
         if is_installed(lang): h = hyphenator(lang)
         else:
             h = hyphenator(lang, '.')
         output_filename = ''.join(('output/', lang, '.', self.filename))
         f = codecs.open(output_filename, 'w', self.encoding)
         logging.info('Writing to output file %s...', output_filename)
         f.writelines('Output file created by hyphen_test.\n\
         Format: word * pairs.\n==========\n')
         for w in self.words:
             pairs_str = ' / '.join(
                 ['- '.join((p[0], p[1])) for p in h.pairs(w)])
             f.write(' * '.join((w, pairs_str)))
             f.write('.\n')
         f.write('*** Word list completed. ***')
         f.close()
         logging.info('Finished %s.', output_filename)
Esempio n. 5
0
def hyphenate(value, arg=None, autoescape=None):
    lang='en_US'
    minlen = 5
    h = hyphenator(lang,directory=settings.DEFAULT_DIC_PATH)
    new = []
    for word in value.split(u' '):
        if len(word) > minlen and word.isalpha():
            new.append(u'­'.join(h.syllables(word)))
        else:
            new.append(word)
            result = u' '.join(new)
    return  result
Esempio n. 6
0
def hyphenate(value, arg=None, autoescape=None):
    default_dic_path = '/home/ford/sites/ftrain.com/htdocs/ftrain/'
    lang='en_US'
    minlen = 5
    h = hyphenator(lang,directory=default_dic_path)
    new = []
    for word in value.split(u' '):
        if len(word) > minlen and word.isalpha():
            new.append(u'­'.join(h.syllables(word)))
        else:
            new.append(word)
            result = u' '.join(new)
    return  result
 def __init__ (self, 
               language="EN",
               minWordLength=4,
               quality=8,
               hyphenDir=None,
               **options
              ):
     ExplicitHyphenator.__init__(self,language=language,minWordLength=minWordLength,**options)
     if hyphenDir is None:
         hyphenDir = os.path.join(os.path.split(__file__)[0], "..", "dict")
     fname = os.path.join(hyphenDir, "hyph_%s.dic" % language)
     if not dictools.is_installed(language, directory=hyphenDir):
         dictools.install(language, directory=hyphenDir)
         print "installed dictionary for %s into %s" % (language, hyphenDir)
     self.hnj = pyhyphen.hyphenator(language, directory=hyphenDir)
     self.quality = quality
Esempio n. 8
0
 def __init__(self,
              language="EN",
              minWordLength=4,
              quality=8,
              hyphenDir=None,
              **options):
     ExplicitHyphenator.__init__(self,
                                 language=language,
                                 minWordLength=minWordLength,
                                 **options)
     if hyphenDir is None:
         hyphenDir = os.path.join(os.path.split(__file__)[0], "..", "dict")
     fname = os.path.join(hyphenDir, "hyph_%s.dic" % language)
     if not dictools.is_installed(language, directory=hyphenDir):
         dictools.install(language, directory=hyphenDir)
         print "installed dictionary for %s into %s" % (language, hyphenDir)
     self.hnj = pyhyphen.hyphenator(language, directory=hyphenDir)
     self.quality = quality
Esempio n. 9
0
import sys
import json

""" 2.7 and up version is capitalized (annoying) """
if sys.version_info >= (2, 7):
    from hyphen import Hyphenator, dictools
    hy = Hyphenator('en_US')
else:
    from hyphen import hyphenator, dictools
    hy = hyphenator('en_US')

try:
    json_object = {}
    for word in sys.argv[1:]:
        json_object[word] = hy.syllables(unicode(word))
    print json.dumps(json_object)
except IndexError:
    sys.exit(1)

sys.exit(0)
Esempio n. 10
0
File: hyp.py Progetto: Giotoc/ACR
#!/usr/bin/python
from hyphen import hyphenator
from hyphen.dictools import *
from xml.sax import make_parser
from xml.sax.handler import ContentHandler 
lang="pl_PL"
if not is_installed(lang): 
	install(lang)
h_pl = hyphenator('pl_PL')

class XMLHyphenator(ContentHandler):
	def __init__ (self):
		self.isPointsElement, self.isReboundsElement = 0, 0
		self.outputXML=[]
	def startElement(self, name, attrs):
		self.outputXML.append("<"+name)
		for i in attrs.keys():
			self.outputXML.append(" "+i+"=\""+attrs.get(i,"")+"\"")
		self.outputXML.append(">")
		return
	def endElement(self, name):
		self.outputXML.append("</"+name+">")
		return
	def characters (self, ch):
		t=ch.split(" ")
		for i in t:
			s=i
			if i.__len__()>5:
				if i[len(i)-1]==",":
					i=h_pl.inserted(i.replace(",",""))+","
				else: