def translate(sentence, translationMatrix, englishDict, frenchDict, uni, bi, tri, slopeParam, sigmaParam): frSent = sentence.split() enLen = int(nrand.normal(len(frSent) / slopeParam, sigmaParam)) enSent = [] dp_mat = [] prob = 1 # creating inverse dictionary inv_eng = {} for j in englishDict: inv_eng[englishDict[j]] = j # creating top indices #### To be run once for creating the top 10 index list #''' index_list = [] for i in range(len(translationMatrix[0])): tmplist = [] tmpindex = [] for j in range(len(translationMatrix)): tmplist.append(translationMatrix[j][i]) tmpindex = sorted(range(len(tmplist)), key=lambda k: tmplist[k], reverse=True) tmplist2 = [] for j in range(20): tmplist2.append(tmpindex[j]) index_list.append(tmplist2) pickle.dump(index_list, open("../top20_index_100_dp.list", "wb")) #''' and 0 #### top_index = pickle.load(open("../top20_index_100_dp.list", "r")) ## Alignment model mTemp = len(frSent) lTemp = enLen alignmentMatrix_curr = alignmentMatrix[hp.alignmentMapping(mTemp, lTemp)][0:lTemp, 0:mTemp] for i in range(enLen): ai = np.argmax(alignmentMatrix_curr[i, 0:mTemp]) # int(nrand.uniform(0, len(frSent))) currFr = frSent[ai] currFrInd = frenchDict[currFr] maxProb = 0 maxInd = 0 template_entry = [] if i == 0: for j in range(len(translationMatrix)): template_entry.append( (inv_eng[j], translationMatrix[j][currFrInd] * lang_model_scores(inv_eng[j], "", uni, bi, 1)) ) dp_mat.append(template_entry) else: for j in range(len(translationMatrix)): maxval = 0 for k in range(len(top_index[0])): currEngInd = top_index[currFrInd][k] probtemp = translationMatrix[currEngInd][currFrInd] * lang_model_scores( inv_eng[currEngInd], dp_mat[i - 1][j][0], uni, bi, 2 ) if maxval < probtemp: maxval = probtemp maxword = inv_eng[currEngInd] template_entry.append((maxword, maxval)) dp_mat.append(template_entry) translated_sent = "" maxprob = 0 # print(dp_mat) for i in range(len(translationMatrix)): tmp_sent = "" tmp_prob = 1 for j in range(enLen): tmp_sent = tmp_sent + dp_mat[j][i][0] + " " tmp_prob = tmp_prob * dp_mat[j][i][1] if maxprob <= tmp_prob: maxprob = tmp_prob translated_sent = tmp_sent # print('** Max prob :') # print(maxprob) return {"translation": translated_sent, "probability": maxprob}
temp += count_matrix[eng_dict[englishWord]][french_dict[frenchWord]] lambda_norm[eng_dict[englishWord]] = temp for frenchWord in french_dict: translation_matrix[eng_dict[englishWord]][french_dict[frenchWord]] = (count_matrix[eng_dict[englishWord]][french_dict[frenchWord]])/temp # Computing the mu_matrix mu_matrix = hp.createMuMatrix() for s in range(len(eng_sent)): m_temp = len(french_sent[s]) l_temp = len(eng_sent[s]) t_temp = np.array([[translation_matrix[eng_dict[wi]][french_dict[wj]] for wj in french_sent[s]] for wi in eng_sent[s]]) a_temp = alignment_matrix[hp.alignmentMapping(m_temp, l_temp)][0:l_temp,0:m_temp] denom_temp = np.reshape(t_temp.transpose().dot(a_temp).diagonal(), (1,m_temp)).repeat(l_temp, 0) c_ijml = (1.0*a_temp*t_temp)/denom_temp mu_matrix[hp.alignmentMapping(m_temp, l_temp)][0:m_temp] += np.sum(c_ijml, 0) ''' for j in range(m_temp): mu_temp = 0 for i in range(l_temp): mu_temp += hp.c_of_i_given_jmlFE(i, j, m_temp, l_temp, eng_sent[s], french_sent[s], translation_matrix, alignment_matrix, eng_dict, french_dict) mu_matrix[hp.alignmentMapping(m_temp, l_temp)][j] += mu_temp ''' and 0 # Updating alignment_matrix