def translaterev(text, language): """ translate(text, language) -> str Translate text by spliting on spaces; search longest string from start text : text to translate language : target language """ pstring = PhraseString(text) outxt = '' Parts = pstring.words remaining = len(Parts) words = remaining loop = 0 #print ('TRANSLATING') while (remaining >= 1): #print ('TRANSLATING'+str(loop)) parts = Parts[0:remaining] cc = len(parts) ss = [ ''.join([st.word + st.breaker for st in parts[i:cc]]) for i in range(0, cc) ] sep = parts[cc - 1].breaker l = len(sep) #print(remaining,cc,sep.encode('utf-8'))#,tr.encode('utf-8')) tr = '' #print ('LOOKING') for i, s in enumerate(ss): if l > 0: phrase = s[:-l] else: phrase = s tr, _ = getTranslation(phrase) #from dictionary if (tr != ''): print(cc - i, tr.encode('utf-8')) tr = tr + sep remaining -= cc - i - 1 break elif (i == cc - 1): tr = mapTranslate(s, language) #print(-1,s,tr) remaining -= 1 outxt = tr + outxt final = ''.join([pstring.prefix, outxt, pstring.suffix]) return MTLangUtils.normalizeLangText(final, language), words, score
def translatenew(text, language, logforngrams=None, logsentencengrams=None, tagmode=False): """ translate(text, language) -> str Translate text by spliting on spaces; search longest string from start text : text to translate language : target language """ global outxt, prstring, count, Start_transitions pstring = PhraseString(text) Parts = pstring.words partsLength = len(Parts) leng = partsLength outxt = '' count = count + 1 postposition = ['में', 'पर', 'की', 'के', 'का'] #print(count) def Start_transitions(text): print('Start_transitions') print(text) pstring = PhraseString(text) Parts = pstring.words partsLength = len(Parts) global outxt #print ("WORDS") #print (partsLength) #words=partsLength loop = 0 tags = {} while (loop < partsLength): parts = Parts[loop:partsLength] cc = len(parts) #if cc>1: ss = [ ''.join([st.word + st.breaker for st in parts[0:cc - i]]) for i in range(0, cc) ] tr = '' for i, s in enumerate(ss): #Remove last separator sep = parts[cc - i - 1].breaker l = len(sep) if l > 0: phrase = s[:-l] else: phrase = s print(phrase) #translate phrase get translation & attribute tr, _, state = getTranslation(phrase) #from dictionary if (phrase not in postposition): tr, _, state = getTranslation(phrase) feature = getWordEndFeatures(tr) if (phrase == 'में' or phrase == 'पर'): if (feature == 'er'): feature = 1 else: feature = 0 tr, _, state = getTranslation(str(feature)) print(feature) if (phrase == 'की' or phrase == 'के' or phrase == 'का'): tr, _, state = getTranslation(feature) print(feature) #print(tr) if (tr != ''): if tagmode: tr = gettagstr(tags, tr) tr = tr + sep loop += (cc - i - 1) #print ("SIZE") #if (cc<=srcwords): # print (cc-srcwords) logthisnGram(phrase, tr, cc - i, logforngrams, logsentencengrams) break elif (i == len(ss) - 1): """print ("SIZE*") print (phrase) print (cc-i) """ tr = mapTranslate(phrase, language) #if not (phrase==tr): #tr=tr+'*' #print (tr.encode('utf-8')) logthisnGram(phrase, tr, -1, logforngrams, logsentencengrams) tr = tr + sep loop += 1 outxt += tr print(outxt) if (i == 0): newState = 'End' Parts = Parts[i:] partsLength = len(Parts) text = ''.join( [st.word + st.breaker for st in Parts[0:partsLength]]) return (newState, text) if (state == 'SHai' ): #move to the next state if second form of hai can come newState = 'SecondHai' Parts = Parts[(partsLength - i):] partsLength = len(Parts) text = ''.join( [st.word + st.breaker for st in Parts[0:partsLength]]) return (newState, text) else: newState = 'Start' #else move to the original start function Parts = Parts[(partsLength - i):] partsLength = len(Parts) #print('going') #print(partsLength) if (partsLength == 1): newState = 'End' return (newState, '') text = ''.join( [st.word + st.breaker for st in Parts[0:partsLength]]) return (newState, text) global End_transitions def End_transitions(txt): #end of state table return ('End of the state', ' ') global SecondHai_transitions def SecondHai_transitions(text): print(text) pstring = PhraseString(text) Parts = pstring.words partsLength = len(Parts) global outxt #print ("WORDS") #print (partsLength) #words=partsLength loop = 0 tags = {} while (loop < partsLength): parts = Parts[loop:partsLength] cc = len(parts) #if cc>1: ss = [ ''.join([st.word + st.breaker for st in parts[0:cc - i]]) for i in range(0, cc) ] tr = '' for i, s in enumerate(ss): #Remove last separator sep = parts[cc - i - 1].breaker l = len(sep) if l > 0: phrase = s[:-l] else: phrase = s #translate phrase get translation & attribute if (phrase == 'है'): tr = 'আছে' outxt += tr newState = 'End' Parts = Parts[i:] partsLength = len(Parts) text = ''.join( [st.word + st.breaker for st in Parts[0:partsLength]]) return (newState, text) tr, _, state = getTranslation(phrase) #from dictionary if (tr != ''): if tagmode: tr = gettagstr(tags, tr) tr = tr + sep loop += (cc - i - 1) #print ("SIZE") #if (cc<=srcwords): # print (cc-srcwords) logthisnGram(phrase, tr, cc - i, logforngrams, logsentencengrams) break elif (i == len(ss) - 1): """print ("SIZE*") print (phrase) print (cc-i) """ tr = mapTranslate(phrase, language) #if not (phrase==tr): #tr=tr+'*' #print (tr.encode('utf-8')) logthisnGram(phrase, tr, -1, logforngrams, logsentencengrams) tr = tr + sep loop += 1 outxt += tr declare(text) final = ''.join([pstring.prefix, outxt, pstring.suffix]) #final = outxt print(final) print(count) score = 0.0 if logsentencengrams is not None: score = logsentencengrams['Score'] return MTLangUtils.normalizeLangText(final, language), leng, score
def translatenewcontext(text, language, logforngrams=None, logsentencengrams=None): """ translate(text, language) -> str Translate text by spliting on spaces; search longest string from start text : text to translate language : target language """ pstring = PhraseString(text) outxt = '' Parts = pstring.words partsLength = len(Parts) #print ("WORDS") #print (partsLength) words = partsLength loop = 0 while (loop < partsLength): parts = Parts[loop:partsLength] cc = len(parts) #if cc>1: ss = [ ''.join([st.word + st.breaker for st in parts[0:cc - i]]) for i in range(0, cc) ] tr = '' for i, s in enumerate(ss): #Remove last separator sep = parts[cc - i - 1].breaker srcwords = cc - i print(srcwords) l = len(sep) if l > 0: phrase = s[:-l] else: phrase = s #translate phrase get translation & attribute tr, _, srcngramwords = getTranslationWithContext( phrase) #from dictionary #tr,_ = getTranslation(phrase) #from dictionary if (tr != ''): if (srcwords > srcngramwords): srcwords = srcngramwords sep = parts[cc - srcwords - 1].breaker tr = tr + sep loop += (srcwords - 1) #loop += (cc-srcwords-1) #print ("SIZE") #if (cc<=srcwords): # print (cc-srcwords) logthisnGram(phrase, tr, cc - srcwords, logforngrams, logsentencengrams) break elif (i == len(ss) - 1): """print ("SIZE*") print (phrase) print (cc-i) """ tr = mapTranslate(phrase, language) #if not (phrase==tr): #tr=tr+'*' #print (tr.encode('utf-8')) logthisnGram(phrase, tr, -1, logforngrams, logsentencengrams) tr = tr + sep loop += 1 outxt += tr final = ''.join([pstring.prefix, outxt, pstring.suffix]) score = 0.0 if logsentencengrams is not None: score = logsentencengrams['Score'] return MTLangUtils.normalizeLangText(final, language), words, score