srclist.append(line) src = u"\n".join(srclist) if duplicateToBeDetermined: mode1 = sourceLang + "_lex-" + targetLang + "_step1" mode2 = sourceLang + "_lex-" + targetLang + "_step2" if alignments: mode1 += "_debug" mode2 += "_debug" if emptymode: mode1 += "_empty" mode2 += "_empty" localTranslation = apertium.translate(src, 'none', mode1) if DEBUG: print >> sys.stderr, "step1 translation: " print >> sys.stderr, localTranslation.encode("utf-8") startDebugTransfer = u"[\[debug-transfer" #replace lines with <ND> or <GD> patterns = [u"<ND>", u"<GD>", u"<PD>"] replacements = [[u"<sg>", u"<pl>"], [u"<m>", u"<f>"], [u"<p1>", u"<p2>", u"<p3>"]] curline = -1 replaced = list() replacementsDone = list() for line in localTranslation.split("\n"): curline += 1 #saber si hay un solo chunk
while pos >= 0: matches+=1 pos=line.find("^",pos) if matches <= 2: srclist.append(line) src=u"\n".join(srclist) if duplicateToBeDetermined: mode1=sourceLang+"_lex-"+targetLang+"_step1" mode2=sourceLang+"_lex-"+targetLang+"_step2" if alignments: mode1+="_debug" mode2+="_debug" localTranslation=apertium.translate(src,'none',mode1) #replace lines with <ND> or <GD> patterns=[u"<ND>",u"<GD>"] replacements=[[u"<sg>",u"<pl>"],[u"<m>",u"<f>"]] curline=-1 replaced=list() replacementsDone=list() for line in localTranslation.split("\n"): curline+=1 patternFound=False for numpat in range(len(patterns)): pattern=patterns[numpat] replacement=replacements[numpat] pos=line.find(pattern) if pos > -1:
def test_en_spa(self): translated = apertium.translate('eng', 'spa', 'cats') self.assertEqual(translated, 'Gatos')
def test_kaz_tat_formatting(self): translated = apertium.translate('kaz', 'tat', 'мысық', formatting='txt') self.assertEqual(translated, 'мәче')
def test_kaz_tat(self): translated = apertium.translate('kaz', 'tat', 'мысық') self.assertEqual(translated, 'мәче')
def test_en_spa_formatting(self): translated = apertium.translate('eng', 'spa', 'cats', formatting='txt') self.assertEqual(translated, 'Gatos')
parts=line.split(u"|||") if len(parts) >= 2: lexforms.append(parts[0].strip()) #sa = SentenceAlignments() #sa.parse(parts[1].strip()) sa=u"|||".join(parts[1:]) prevAlignments.append(sa) else: lexforms.append(line) if targetlang == "pt": mode=srclang+"_lex_analyzed-"+srclang else: mode=srclang+"_lex_analyzed_with_"+targetlang+"-"+srclang supForm=apertium.translate(u"[sep_sentence]\n".join(lexforms).replace(u"^",u"[sep_word_vmsanchez_e][sep_word_vmsanchez_s]^"),"none",mode) supForms=splitter.split(supForm) if not len(supForms) == len(lexforms): print >> sys.stderr, "Error: length mismatch between surface form phrases ("+str(len(supForms))+") and lexical forms ("+str(len(lexforms))+")" print >> sys.stderr, supForm exit(1) if debugOn: mwlf=MultiwordLexicalForms() #print >>sys.stderr, "Loading target language multiword lexical forms" mwlf.load(mwFile) for i in range(len(supForms)): sup=supForms[i] lex=lexforms[i]