Example #1
0
	def init_run_mary(self,text):
		#print ">> init_run_mary..."
		import lexconvert,bs4
		numwords = 0
		stanza=self.newchild()
		line=stanza.newchild()
		
		for stanzatext in text.split('\n\n'):
			stanzatext=stanzatext.strip()
			if not stanzatext: continue
			
			for linetext in stanzatext.split('\n'):
				linetext=linetext.strip()
				if not linetext: continue
				
				wordlist=linetext.split()
				for i,word in enumerate(wordlist):
					p0,word,p1=gleanPunc2(word)
					if p0 and not line.empty(): line.finish()
					if not word: continue
					
					if stanza.finished: stanza = self.newchild()
					if line.finished: line = stanza.newchild()
					
					if self.dict.has(word):
						words=self.dict.get(word)
						for w in words: w.origin='cmu'
					elif self.lang=='en':
						## make word from openmary
						wordxml=bs4.BeautifulSoup(openmary(word))
						sylls=[]
						for syll in wordxml.find_all('syllable'):
							syllstr="'" if syll.get('stress',None) else ""
							#print syll['ph']
							for ph in syll['ph'].split():
								syllstr+=sampa2ipa(ph)
							#print syllstr
							#print
							sylls+=[syllstr]

						from Phoneme import Phoneme
						if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]:
							sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else [])

						pronounc='.'.join(sylls)
						words=[ self.dict.make((pronounc,[]), word) ]
						for w in words: w.origin='openmary'
					else:
						words=self.dict.get(word)
					
					line.newchild(words)
					if self.phrasebreak!='line':
						if p1 and not line.empty(): line.finish()
					numwords+=1
				
				if not line.empty(): line.finish()
			if not line.empty(): line.finish()
			if not stanza.empty(): stanza.finish()			
Example #2
0
    def init_mary(self, xml):
        import lexconvert, bs4
        xml = bs4.BeautifulSoup(xml)
        numwords = 0
        stanza = self.newchild()
        line = stanza.newchild()

        for para in xml.find_all('p'):
            for phrase in para.find_all('phrase'):

                for word in phrase.find_all('t'):
                    if stanza.finished: stanza = self.newchild()
                    if line.finished: line = stanza.newchild()
                    wordstr = word['token']
                    if not word.get('ph', None): continue
                    if self.dict.has(wordstr) and self.use_dict:
                        #print "HAVE",wordstr
                        words = self.dict.get(wordstr)
                        for w in words:
                            w.origin = 'cmu'
                        #print ">>",wordstr,words
                    else:
                        #print "??",wordstr
                        ## make word from openmary
                        sylls = []
                        for syll in word.find_all('syllable'):
                            syllstr = "'" if syll.get('stress', None) else ""

                            for ph in syll('ph'):
                                ph_str = ph['p']
                                ph_ipa = sampa2ipa(ph_str)
                                #print ph_str, ph_ipa
                                syllstr += ph_ipa

                            #syllstr+=lexconvert.convert(syll['ph'],'sampa','unicode-ipa')
                            #print syllstr, syll['ph']
                            sylls += [syllstr]

                            #if self.fix_phons_novowel:
                            from Phoneme import Phoneme
                            #if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]:
                            if len(sylls) > 1 and sylls[0] == u'ʃ':
                                sylls = [sylls[0] + sylls[1]] + (
                                    sylls[2:] if len(sylls) > 2 else [])

                        pronounc = '.'.join(sylls)
                        words = [self.dict.make((pronounc, []), wordstr)]
                        for w in words:
                            w.origin = 'openmary'

                    line.newchild(words)
                    numwords += 1
                if not line.empty(): line.finish()
            if not line.empty(): line.finish()
            if not stanza.empty(): stanza.finish()
Example #3
0
	def init_mary(self,xml):
		import lexconvert,bs4
		xml=bs4.BeautifulSoup(xml)
		numwords = 0
		stanza=self.newchild()
		line=stanza.newchild()
		
		
		for para in xml.find_all('p'):
			for phrase in para.find_all('phrase'):
				
				for word in phrase.find_all('t'):
					if stanza.finished: stanza = self.newchild()
					if line.finished: line = stanza.newchild()
					wordstr=word['token']
					if not word.get('ph',None): continue
					if self.dict.has(wordstr) and self.use_dict:
						#print "HAVE",wordstr
						words=self.dict.get(wordstr)
						for w in words: w.origin='cmu'
						#print ">>",wordstr,words
					else:
						#print "??",wordstr
						## make word from openmary
						sylls=[]
						for syll in word.find_all('syllable'):
							syllstr="'" if syll.get('stress',None) else ""

							for ph in syll('ph'):
								ph_str=ph['p']
								ph_ipa=sampa2ipa(ph_str)
								#print ph_str, ph_ipa
								syllstr+=ph_ipa

							#syllstr+=lexconvert.convert(syll['ph'],'sampa','unicode-ipa')
							#print syllstr, syll['ph']
							sylls+=[syllstr]
						
						#if self.fix_phons_novowel:
							from Phoneme import Phoneme
							#if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]:
							if len(sylls)>1 and sylls[0]==u'ʃ':
								sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else [])
							
						
						pronounc='.'.join(sylls)
						words=[ self.dict.make((pronounc,[]), wordstr) ]
						for w in words: w.origin='openmary'
					
					line.newchild(words)
					numwords+=1
				if not line.empty(): line.finish()
			if not line.empty(): line.finish()
			if not stanza.empty(): stanza.finish()
Example #4
0
def openmary2ipa(word):
	wordxml=openmary(word)
	sylls=[]
	for syll in wordxml.find_all('syllable'):
		syllstr="'" if syll.get('stress',None) else ""
		for ph in syll['ph'].split():
			syllstr+=sampa2ipa(ph)
		sylls+=[syllstr]

	from Phoneme import Phoneme
	if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]:
		sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else [])

	pronounc='.'.join(sylls)
	return pronounc
Example #5
0
def openmary2ipa(word):
	import urllib.request, urllib.error, urllib.parse
	try:
		wordxml=openmary(word)
	except urllib.error.URLError:
		return None
	sylls=[]
	for syll in wordxml.find_all('syllable'):
		syllstr="'" if syll.get('stress',None) else ""
		for ph in syll['ph'].split():
			syllstr+=sampa2ipa(ph)
		sylls+=[syllstr]

	from Phoneme import Phoneme
	if len(sylls)>1 and not True in [Phoneme(phon).isVowel() for phon in sylls[0]]:
		sylls=[sylls[0]+sylls[1]]+ (sylls[2:] if len(sylls)>2 else [])

	pronounc='.'.join(sylls)
	return pronounc
Example #6
0
    def init_run_mary(self, text):
        #print ">> init_run_mary..."
        import lexconvert, bs4
        numwords = 0
        stanza = self.newchild()
        line = stanza.newchild()

        for stanzatext in text.split('\n\n'):
            stanzatext = stanzatext.strip()
            if not stanzatext: continue

            for linetext in stanzatext.split('\n'):
                linetext = linetext.strip()
                if not linetext: continue

                wordlist = linetext.split()
                for i, word in enumerate(wordlist):
                    p0, word, p1 = gleanPunc2(word)
                    if p0 and not line.empty(): line.finish()
                    if not word: continue

                    if stanza.finished: stanza = self.newchild()
                    if line.finished: line = stanza.newchild()

                    if self.dict.has(word):
                        words = self.dict.get(word)
                        for w in words:
                            w.origin = 'cmu'
                    elif self.lang == 'en':
                        ## make word from openmary
                        wordxml = bs4.BeautifulSoup(openmary(word))
                        sylls = []
                        for syll in wordxml.find_all('syllable'):
                            syllstr = "'" if syll.get('stress', None) else ""
                            #print syll['ph']
                            for ph in syll['ph'].split():
                                syllstr += sampa2ipa(ph)
                            #print syllstr
                            #print
                            sylls += [syllstr]

                        from Phoneme import Phoneme
                        if len(sylls) > 1 and not True in [
                                Phoneme(phon).isVowel() for phon in sylls[0]
                        ]:
                            sylls = [sylls[0] + sylls[1]
                                     ] + (sylls[2:] if len(sylls) > 2 else [])

                        pronounc = '.'.join(sylls)
                        words = [self.dict.make((pronounc, []), word)]
                        for w in words:
                            w.origin = 'openmary'
                    else:
                        words = self.dict.get(word)

                    line.newchild(words)
                    if self.phrasebreak != 'line':
                        if p1 and not line.empty(): line.finish()
                    numwords += 1

                if not line.empty(): line.finish()
            if not line.empty(): line.finish()
            if not stanza.empty(): stanza.finish()