def extractPhrases(normalToSimple, simpleToNormal): data = [] fns = open(normalToSimple, 'r') fsn = open(simpleToNormal, 'r') count = 0 while True: count += 1 print(count) line = fns.readline() if line == "": break normalToSimple1 = fns.readline() normalToSimple2 = fns.readline() line = fsn.readline() simpleToNormal1 = fsn.readline() simpleToNormal2 = fsn.readline() wordAlignment, normal, simple = findAlignment(normalToSimple1, normalToSimple2, simpleToNormal1, simpleToNormal2) lNormal = len(normal) lSimple = len(simple) # print(wordAlignment) phrases = [] for estart in range(lNormal): for eend in range(estart, (lNormal)): fstart = lSimple fend = -1 for i in wordAlignment: if i <= eend and i >= estart: for j in wordAlignment[i]: fstart = min(j, fstart) fend = max(j, fend) if ((eend - estart) <= 20) or ((fend - fstart) <= 20): phrases.append([estart, eend, fstart, fend]) # print phrases for key in phrases: estart = key[0] eend = key[1] fstart = key[2] fend = key[3] phrase = extract(fstart, fend, estart, eend, wordAlignment, normal, simple) # print (phrase) if phrase != 'NULL': # print (phrase) data.append(phrase[0] + '\t' + phrase[1]) fns.close() fsn.close() f = open('phrases.txt', 'w') f.write('\n'.join(data)) f.close()
def extractPhrases(englishToGerman, germanToEnglish): '''this method reads the file for both the source and target language and returns the phrases extracted from the sentences. The phrases are consistent in nature''' data=[] feg = open(englishToGerman, 'r') fge = open(germanToEnglish,'r') count = 0 while True: count+=1 print count line = feg.readline() if line == "": break englishToGerman1 = feg.readline() englishToGerman2 = feg.readline() #print englishToGerman1 line = fge.readline() germanToEnglish1 = fge.readline() germanToEnglish2 = fge.readline() #print germanToEnglish1 wordAlignment, english, german = findAlignment(englishToGerman1, englishToGerman2, germanToEnglish1, germanToEnglish2) lEnglish = len(english) lGerman = len(german) phrases = [] for estart in range(lEnglish): for eend in range(estart,(lEnglish)): fstart = lGerman fend = -1 for i in wordAlignment: if i <= eend and i >= estart: for j in wordAlignment[i]: fstart = min(j, fstart) fend = max(j, fend) if ((eend - estart) <= 20) or ((fend -fstart) <= 20) : phrases.append([estart, eend, fstart, fend]) #print phrases for key in phrases: estart = key[0] eend = key[1] fstart = key [2] fend = key[3] phrase = extract (fstart, fend,estart, eend,wordAlignment, english, german) if phrase!= 'NULL': #print phrase data.append(phrase[0]+'\t'+phrase[1]) feg.close() fge.close() f=open('phrases.txt','w') f.write('\n'.join(data)) f.close()
def extractPhrases(sourceToTarget, targetToSource): '''this method reads the file for both the source and target language and returns the phrases extracted from the sentences. The phrases are consistent in nature''' data = [] feg = open(sourceToTarget, 'r') fge = open(targetToSource, 'r') count = 0 while True: count += 1 print count line = feg.readline() if line == "": break sourceToTarget1 = feg.readline() sourceToTarget2 = feg.readline() #print sourceToTarget1 line = fge.readline() targetToSource1 = fge.readline() targetToSource2 = fge.readline() #print targetToSource1 wordAlignment, source, target = findAlignment(sourceToTarget1, sourceToTarget2, targetToSource1, targetToSource2) lSource = len(source) lTarget = len(target) phrases = [] for estart in range(lSource): for eend in range(estart, (lSource)): fstart = lTarget fend = -1 for i in wordAlignment: if i <= eend and i >= estart: for j in wordAlignment[i]: fstart = min(j, fstart) fend = max(j, fend) if ((eend - estart) <= 20) or ((fend - fstart) <= 20): phrases.append([estart, eend, fstart, fend]) # print phrases for key in phrases: estart = key[0] eend = key[1] fstart = key[2] fend = key[3] phrase = extract(fstart, fend, estart, eend, wordAlignment, source, target) if phrase != 'NULL': #print phrase tmp = (phrase[0].strip() + '\t' + phrase[1].strip()) if tmp.strip() != "" and tmp not in data: data.append(tmp) feg.close() fge.close() f = open('phrases.txt', 'w') f.write('\n'.join(data)) f.close()
_ = cv2.circle(disp, (p[0] - rect[0], p[1] - rect[1]), r, (0, 0, v), cv2.FILLED) for i, p in enumerate(np.asarray(_2[:, :2] * 20, np.int32)): v = 128 + int(127 * (_2[i, 2] - mini) / (maxi - mini + 1)) r = 5 if i == 0 else 1 _ = cv2.circle(disp, (p[0] - rect[0], p[1] - rect[1]), r, (0, v, 0), cv2.FILLED) return disp disp = display(dst, src) cv2.imshow("disp", disp) cv2.waitKey(0) cv2.destroyAllWindows() R, t = findRigidTransform(dst, src) src1 = dst @ R.T + t disp = display(src1, src) cv2.imshow("disp", disp) cv2.waitKey(0) cv2.destroyAllWindows() R, t = findAlignment(dst, src) src2 = dst @ R.T + t disp = display(src2, src) cv2.imshow("disp", disp) cv2.waitKey(0) cv2.destroyAllWindows()
def extractPhrases(source_target, target_source): ''' Extract consistent phrases from the bi-lingual sentences ''' data = [] src_align_file = open(source_target, 'r') trg_align_file = open(target_source, 'r') count = 0 while True: count += 1 #print(count) line = src_align_file.readline() if line == "": break target_txt = src_align_file.readline() source_align_idx = src_align_file.readline() #print("Target Text:", target_txt.rstrip('\n')) line = trg_align_file.readline() source_txt = trg_align_file.readline() target_align_idx = trg_align_file.readline() #print("Source Text:", source_txt.rstrip('\n')) if float(line.strip().split(':')[1].strip()) < 1e-18: continue word_alignment, source, target = findAlignment(target_txt, source_align_idx, source_txt, target_align_idx) print("Source Text:", source) print("Target Text:", target) print("Word Alignment:", word_alignment, end = "\n\n") src_len = len(source) trg_len = len(target) phrases = [] for src_start in range(src_len): for src_end in range(src_start,(src_len)): trg_start = trg_len trg_end = -1 for i in word_alignment: if i <= src_end and i >= src_start: for j in word_alignment[i]: trg_start = min(j, trg_start) trg_end = max(j, trg_end) if ((src_end - src_start) <= 5) or ((trg_end - trg_start) <= 5) : phrases.append([src_start, src_end, trg_start, trg_end]) #print(phrases) for key in phrases: src_start = key[0] src_end = key[1] trg_start = key [2] trg_end = key[3] phrase = extract (src_start, src_end, trg_start, trg_end, word_alignment, source, target) if phrase is not None: #print(phrase) data.append(phrase[0] + '\t' + phrase[1]) src_align_file.close() trg_align_file.close() phrase_file = open('phrases.txt','w') phrase_file.write('\n'.join(data)) phrase_file.close()