def test_aligners(japanese, english):    
    for i in range(len(english)):
        out1 = aligner1.align_words(japanese[i],english[i])
        out2 = aligner2.align_words(japanese[i],english[i], False)
        if (out1 != out2):
            if (out2 == "ERR"):
                print "Basic: " + str(out1)
                print "Ted  : " + str(out2)
                print "-"
Ejemplo n.º 2
0
def align_phoneme_sets(japanese,english,verbose=False):
	"""
	INPUT:
		japanese - an array of japanese phoneme sequences. each item is a string
		english  - an array of english phoneme sequences. each item is a string
	
	OUTPUT: 
		j_aligned - an array of japanese phoneme sequences. each item is an ARRAY
		e_aligned - an array of english phoneme sequences. each item is an ARRAY
	"""
	import align_ted as aligner
	print "[Start] Aligning Phoneme Sequences"
	print "[.....] Starting with %i sequences" % len(japanese)	
	
	aligned_japanese = []
	aligned_english = []
	errors = 0 
	err_align = open("error_align.txt",'w')

	for i in range(len(japanese)):
		try:
			jap_phones = japanese[i] 
			eng_phones = english[i]
			aligned = aligner.align_words(jap_phones,eng_phones,False)
			if (aligned == "ERR"):
				err_align.write("line No:%d"%i+"\n")
				err_align.write("Eng:%s"%jap_phones[i])
				err_align.write("Jap:%s"%eng_phones[i]+"\n")
				errors = errors + 1
			else:
				aligned_japanese.append(aligned[0])
				aligned_english.append(aligned[1])
 		except:
 			if verbose:
				print "Unexpected error line %i: %s" % (i, sys.exc_info()[0]) 
			errors = errors + 1

	err_align.close()
	print "[.....] Success: %i, Error: %i, Error Rate: %f" % (len(aligned_japanese), errors, float(errors)/len(aligned_japanese))
	print "[ End ] Aligning Phoneme Sequences"
	return aligned_japanese, aligned_english