def init_run_mary(self,text): #print ">> init_run_mary..." import lexconvert,bs4 numwords = 0 stanza=self.newchild() line=stanza.newchild() for stanzatext in text.split('\n\n'): stanzatext=stanzatext.strip() if not stanzatext: continue for linetext in stanzatext.split('\n'): linetext=linetext.strip() if not linetext: continue wordlist=linetext.split() for i,word in enumerate(wordlist): p0,word,p1=gleanPunc2(word) if p0 and not line.empty(): line.finish() if not word: continue if stanza.finished: stanza = self.newchild() if line.finished: line = stanza.newchild() if self.dict.has(word): words=self.dict.get(word) for w in words: w.origin='cmu' elif self.lang=='en': ## make word from openmary wordxml=bs4.BeautifulSoup(openmary(word)) sylls=[] for syll in wordxml.find_all('syllable'): syllstr="'" if syll.get('stress',None) else "" #print syll['ph'] for ph in syll['ph'].split(): syllstr+=sampa2ipa(ph) #print syllstr #print sylls+=[syllstr] from Phoneme import Phoneme if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]: sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else []) pronounc='.'.join(sylls) words=[ self.dict.make((pronounc,[]), word) ] for w in words: w.origin='openmary' else: words=self.dict.get(word) line.newchild(words) if self.phrasebreak!='line': if p1 and not line.empty(): line.finish() numwords+=1 if not line.empty(): line.finish() if not line.empty(): line.finish() if not stanza.empty(): stanza.finish()
def init_mary(self, xml): import lexconvert, bs4 xml = bs4.BeautifulSoup(xml) numwords = 0 stanza = self.newchild() line = stanza.newchild() for para in xml.find_all('p'): for phrase in para.find_all('phrase'): for word in phrase.find_all('t'): if stanza.finished: stanza = self.newchild() if line.finished: line = stanza.newchild() wordstr = word['token'] if not word.get('ph', None): continue if self.dict.has(wordstr) and self.use_dict: #print "HAVE",wordstr words = self.dict.get(wordstr) for w in words: w.origin = 'cmu' #print ">>",wordstr,words else: #print "??",wordstr ## make word from openmary sylls = [] for syll in word.find_all('syllable'): syllstr = "'" if syll.get('stress', None) else "" for ph in syll('ph'): ph_str = ph['p'] ph_ipa = sampa2ipa(ph_str) #print ph_str, ph_ipa syllstr += ph_ipa #syllstr+=lexconvert.convert(syll['ph'],'sampa','unicode-ipa') #print syllstr, syll['ph'] sylls += [syllstr] #if self.fix_phons_novowel: from Phoneme import Phoneme #if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]: if len(sylls) > 1 and sylls[0] == u'ʃ': sylls = [sylls[0] + sylls[1]] + ( sylls[2:] if len(sylls) > 2 else []) pronounc = '.'.join(sylls) words = [self.dict.make((pronounc, []), wordstr)] for w in words: w.origin = 'openmary' line.newchild(words) numwords += 1 if not line.empty(): line.finish() if not line.empty(): line.finish() if not stanza.empty(): stanza.finish()
def init_mary(self,xml): import lexconvert,bs4 xml=bs4.BeautifulSoup(xml) numwords = 0 stanza=self.newchild() line=stanza.newchild() for para in xml.find_all('p'): for phrase in para.find_all('phrase'): for word in phrase.find_all('t'): if stanza.finished: stanza = self.newchild() if line.finished: line = stanza.newchild() wordstr=word['token'] if not word.get('ph',None): continue if self.dict.has(wordstr) and self.use_dict: #print "HAVE",wordstr words=self.dict.get(wordstr) for w in words: w.origin='cmu' #print ">>",wordstr,words else: #print "??",wordstr ## make word from openmary sylls=[] for syll in word.find_all('syllable'): syllstr="'" if syll.get('stress',None) else "" for ph in syll('ph'): ph_str=ph['p'] ph_ipa=sampa2ipa(ph_str) #print ph_str, ph_ipa syllstr+=ph_ipa #syllstr+=lexconvert.convert(syll['ph'],'sampa','unicode-ipa') #print syllstr, syll['ph'] sylls+=[syllstr] #if self.fix_phons_novowel: from Phoneme import Phoneme #if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]: if len(sylls)>1 and sylls[0]==u'ʃ': sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else []) pronounc='.'.join(sylls) words=[ self.dict.make((pronounc,[]), wordstr) ] for w in words: w.origin='openmary' line.newchild(words) numwords+=1 if not line.empty(): line.finish() if not line.empty(): line.finish() if not stanza.empty(): stanza.finish()
def openmary2ipa(word): wordxml=openmary(word) sylls=[] for syll in wordxml.find_all('syllable'): syllstr="'" if syll.get('stress',None) else "" for ph in syll['ph'].split(): syllstr+=sampa2ipa(ph) sylls+=[syllstr] from Phoneme import Phoneme if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]: sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else []) pronounc='.'.join(sylls) return pronounc
def openmary2ipa(word): import urllib.request, urllib.error, urllib.parse try: wordxml=openmary(word) except urllib.error.URLError: return None sylls=[] for syll in wordxml.find_all('syllable'): syllstr="'" if syll.get('stress',None) else "" for ph in syll['ph'].split(): syllstr+=sampa2ipa(ph) sylls+=[syllstr] from Phoneme import Phoneme if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]: sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else []) pronounc='.'.join(sylls) return pronounc
def init_run_mary(self, text): #print ">> init_run_mary..." import lexconvert, bs4 numwords = 0 stanza = self.newchild() line = stanza.newchild() for stanzatext in text.split('\n\n'): stanzatext = stanzatext.strip() if not stanzatext: continue for linetext in stanzatext.split('\n'): linetext = linetext.strip() if not linetext: continue wordlist = linetext.split() for i, word in enumerate(wordlist): p0, word, p1 = gleanPunc2(word) if p0 and not line.empty(): line.finish() if not word: continue if stanza.finished: stanza = self.newchild() if line.finished: line = stanza.newchild() if self.dict.has(word): words = self.dict.get(word) for w in words: w.origin = 'cmu' elif self.lang == 'en': ## make word from openmary wordxml = bs4.BeautifulSoup(openmary(word)) sylls = [] for syll in wordxml.find_all('syllable'): syllstr = "'" if syll.get('stress', None) else "" #print syll['ph'] for ph in syll['ph'].split(): syllstr += sampa2ipa(ph) #print syllstr #print sylls += [syllstr] from Phoneme import Phoneme if len(sylls) > 1 and not True in [ Phoneme(phon).isVowel() for phon in sylls[0] ]: sylls = [sylls[0] + sylls[1] ] + (sylls[2:] if len(sylls) > 2 else []) pronounc = '.'.join(sylls) words = [self.dict.make((pronounc, []), word)] for w in words: w.origin = 'openmary' else: words = self.dict.get(word) line.newchild(words) if self.phrasebreak != 'line': if p1 and not line.empty(): line.finish() numwords += 1 if not line.empty(): line.finish() if not line.empty(): line.finish() if not stanza.empty(): stanza.finish()