Beispiel #1
0
def translaterev(text, language):
    """
    translate(text, language) -> str
    Translate text by spliting on spaces; search longest string from start
        text       : text to translate
        language   : target language
    """
    pstring = PhraseString(text)
    outxt = ''
    Parts = pstring.words
    remaining = len(Parts)
    words = remaining
    loop = 0
    #print ('TRANSLATING')
    while (remaining >= 1):
        #print ('TRANSLATING'+str(loop))
        parts = Parts[0:remaining]
        cc = len(parts)
        ss = [
            ''.join([st.word + st.breaker for st in parts[i:cc]])
            for i in range(0, cc)
        ]
        sep = parts[cc - 1].breaker
        l = len(sep)
        #print(remaining,cc,sep.encode('utf-8'))#,tr.encode('utf-8'))

        tr = ''
        #print ('LOOKING')
        for i, s in enumerate(ss):
            if l > 0:
                phrase = s[:-l]
            else:
                phrase = s
            tr, _ = getTranslation(phrase)  #from dictionary
            if (tr != ''):
                print(cc - i, tr.encode('utf-8'))
                tr = tr + sep
                remaining -= cc - i - 1
                break
            elif (i == cc - 1):
                tr = mapTranslate(s, language)
                #print(-1,s,tr)
        remaining -= 1
        outxt = tr + outxt
    final = ''.join([pstring.prefix, outxt, pstring.suffix])
    return MTLangUtils.normalizeLangText(final, language), words, score
Beispiel #2
0
def translatenew(text,
                 language,
                 logforngrams=None,
                 logsentencengrams=None,
                 tagmode=False):
    """
    translate(text, language) -> str
    Translate text by spliting on spaces; search longest string from start
        text       : text to translate
        language   : target language
    """
    global outxt, prstring, count, Start_transitions
    pstring = PhraseString(text)
    Parts = pstring.words
    partsLength = len(Parts)
    leng = partsLength
    outxt = ''
    count = count + 1
    postposition = ['में', 'पर', 'की', 'के', 'का']

    #print(count)

    def Start_transitions(text):
        print('Start_transitions')
        print(text)
        pstring = PhraseString(text)
        Parts = pstring.words
        partsLength = len(Parts)
        global outxt
        #print ("WORDS")
        #print (partsLength)
        #words=partsLength
        loop = 0
        tags = {}
        while (loop < partsLength):
            parts = Parts[loop:partsLength]
            cc = len(parts)
            #if cc>1:

            ss = [
                ''.join([st.word + st.breaker for st in parts[0:cc - i]])
                for i in range(0, cc)
            ]

            tr = ''
            for i, s in enumerate(ss):
                #Remove last separator
                sep = parts[cc - i - 1].breaker
                l = len(sep)
                if l > 0:
                    phrase = s[:-l]
                else:
                    phrase = s
                print(phrase)
                #translate phrase get translation & attribute
                tr, _, state = getTranslation(phrase)  #from dictionary
                if (phrase not in postposition):
                    tr, _, state = getTranslation(phrase)
                    feature = getWordEndFeatures(tr)
                if (phrase == 'में' or phrase == 'पर'):
                    if (feature == 'er'):
                        feature = 1
                    else:
                        feature = 0
                    tr, _, state = getTranslation(str(feature))
                    print(feature)
                if (phrase == 'की' or phrase == 'के' or phrase == 'का'):
                    tr, _, state = getTranslation(feature)
                    print(feature)
                    #print(tr)

                if (tr != ''):
                    if tagmode:
                        tr = gettagstr(tags, tr)
                    tr = tr + sep
                    loop += (cc - i - 1)
                    #print ("SIZE")
                    #if (cc<=srcwords):
                    #    print (cc-srcwords)
                    logthisnGram(phrase, tr, cc - i, logforngrams,
                                 logsentencengrams)
                    break
                elif (i == len(ss) - 1):
                    """print ("SIZE*")
                    print (phrase)
                    print (cc-i)
                    """
                    tr = mapTranslate(phrase, language)
                    #if not (phrase==tr):
                    #tr=tr+'*'
                    #print (tr.encode('utf-8'))
                    logthisnGram(phrase, tr, -1, logforngrams,
                                 logsentencengrams)
                    tr = tr + sep
            loop += 1
            outxt += tr
            print(outxt)
            if (i == 0):
                newState = 'End'
                Parts = Parts[i:]
                partsLength = len(Parts)
                text = ''.join(
                    [st.word + st.breaker for st in Parts[0:partsLength]])
                return (newState, text)
            if (state == 'SHai'
                ):  #move to the next state if second form of hai can come
                newState = 'SecondHai'
                Parts = Parts[(partsLength - i):]
                partsLength = len(Parts)
                text = ''.join(
                    [st.word + st.breaker for st in Parts[0:partsLength]])
                return (newState, text)
            else:
                newState = 'Start'  #else move to the original start function
                Parts = Parts[(partsLength - i):]
                partsLength = len(Parts)
                #print('going')
                #print(partsLength)
                if (partsLength == 1):
                    newState = 'End'
                    return (newState, '')
                text = ''.join(
                    [st.word + st.breaker for st in Parts[0:partsLength]])
                return (newState, text)

    global End_transitions

    def End_transitions(txt):  #end of state table
        return ('End of the state', ' ')

    global SecondHai_transitions

    def SecondHai_transitions(text):
        print(text)
        pstring = PhraseString(text)
        Parts = pstring.words
        partsLength = len(Parts)
        global outxt
        #print ("WORDS")
        #print (partsLength)
        #words=partsLength
        loop = 0
        tags = {}
        while (loop < partsLength):
            parts = Parts[loop:partsLength]
            cc = len(parts)
            #if cc>1:

            ss = [
                ''.join([st.word + st.breaker for st in parts[0:cc - i]])
                for i in range(0, cc)
            ]

            tr = ''
            for i, s in enumerate(ss):
                #Remove last separator
                sep = parts[cc - i - 1].breaker
                l = len(sep)
                if l > 0:
                    phrase = s[:-l]
                else:
                    phrase = s
                #translate phrase get translation & attribute
                if (phrase == 'है'):
                    tr = 'আছে'
                    outxt += tr
                    newState = 'End'
                    Parts = Parts[i:]
                    partsLength = len(Parts)
                    text = ''.join(
                        [st.word + st.breaker for st in Parts[0:partsLength]])
                    return (newState, text)
                tr, _, state = getTranslation(phrase)  #from dictionary
                if (tr != ''):
                    if tagmode:
                        tr = gettagstr(tags, tr)
                    tr = tr + sep
                    loop += (cc - i - 1)
                    #print ("SIZE")
                    #if (cc<=srcwords):
                    #    print (cc-srcwords)
                    logthisnGram(phrase, tr, cc - i, logforngrams,
                                 logsentencengrams)
                    break
                elif (i == len(ss) - 1):
                    """print ("SIZE*")
                    print (phrase)
                    print (cc-i)
                    """
                    tr = mapTranslate(phrase, language)
                    #if not (phrase==tr):
                    #tr=tr+'*'
                    #print (tr.encode('utf-8'))
                    logthisnGram(phrase, tr, -1, logforngrams,
                                 logsentencengrams)
                    tr = tr + sep
            loop += 1
            outxt += tr

    declare(text)
    final = ''.join([pstring.prefix, outxt, pstring.suffix])
    #final = outxt
    print(final)
    print(count)
    score = 0.0
    if logsentencengrams is not None:
        score = logsentencengrams['Score']
    return MTLangUtils.normalizeLangText(final, language), leng, score
Beispiel #3
0
def translatenewcontext(text,
                        language,
                        logforngrams=None,
                        logsentencengrams=None):
    """
    translate(text, language) -> str
    Translate text by spliting on spaces; search longest string from start
        text       : text to translate
        language   : target language
    """
    pstring = PhraseString(text)
    outxt = ''
    Parts = pstring.words
    partsLength = len(Parts)
    #print ("WORDS")
    #print (partsLength)
    words = partsLength
    loop = 0
    while (loop < partsLength):
        parts = Parts[loop:partsLength]
        cc = len(parts)
        #if cc>1:

        ss = [
            ''.join([st.word + st.breaker for st in parts[0:cc - i]])
            for i in range(0, cc)
        ]

        tr = ''
        for i, s in enumerate(ss):
            #Remove last separator
            sep = parts[cc - i - 1].breaker
            srcwords = cc - i
            print(srcwords)
            l = len(sep)
            if l > 0:
                phrase = s[:-l]
            else:
                phrase = s
            #translate phrase get translation & attribute
            tr, _, srcngramwords = getTranslationWithContext(
                phrase)  #from dictionary
            #tr,_ = getTranslation(phrase)  #from dictionary
            if (tr != ''):
                if (srcwords > srcngramwords):
                    srcwords = srcngramwords
                    sep = parts[cc - srcwords - 1].breaker
                tr = tr + sep
                loop += (srcwords - 1)
                #loop += (cc-srcwords-1)
                #print ("SIZE")
                #if (cc<=srcwords):
                #    print (cc-srcwords)
                logthisnGram(phrase, tr, cc - srcwords, logforngrams,
                             logsentencengrams)
                break
            elif (i == len(ss) - 1):
                """print ("SIZE*")
                print (phrase)
                print (cc-i)
				"""
                tr = mapTranslate(phrase, language)
                #if not (phrase==tr):
                #tr=tr+'*'
                #print (tr.encode('utf-8'))
                logthisnGram(phrase, tr, -1, logforngrams, logsentencengrams)
                tr = tr + sep
        loop += 1
        outxt += tr
    final = ''.join([pstring.prefix, outxt, pstring.suffix])
    score = 0.0
    if logsentencengrams is not None:
        score = logsentencengrams['Score']
    return MTLangUtils.normalizeLangText(final, language), words, score