def extractPhrases(normalToSimple, simpleToNormal):

    data = []
    fns = open(normalToSimple, 'r')
    fsn = open(simpleToNormal, 'r')
    count = 0
    while True:
        count += 1
        print(count)
        line = fns.readline()
        if line == "":
            break
        normalToSimple1 = fns.readline()
        normalToSimple2 = fns.readline()
        line = fsn.readline()
        simpleToNormal1 = fsn.readline()
        simpleToNormal2 = fsn.readline()
        wordAlignment, normal, simple = findAlignment(normalToSimple1,
                                                      normalToSimple2,
                                                      simpleToNormal1,
                                                      simpleToNormal2)

        lNormal = len(normal)
        lSimple = len(simple)
        # print(wordAlignment)
        phrases = []
        for estart in range(lNormal):
            for eend in range(estart, (lNormal)):
                fstart = lSimple
                fend = -1
                for i in wordAlignment:
                    if i <= eend and i >= estart:
                        for j in wordAlignment[i]:
                            fstart = min(j, fstart)
                            fend = max(j, fend)
                if ((eend - estart) <= 20) or ((fend - fstart) <= 20):
                    phrases.append([estart, eend, fstart, fend])
        # print phrases
        for key in phrases:
            estart = key[0]
            eend = key[1]
            fstart = key[2]
            fend = key[3]
            phrase = extract(fstart, fend, estart, eend, wordAlignment, normal,
                             simple)
            # print (phrase)
            if phrase != 'NULL':
                # print (phrase)
                data.append(phrase[0] + '\t' + phrase[1])
    fns.close()
    fsn.close()

    f = open('phrases.txt', 'w')
    f.write('\n'.join(data))
    f.close()
Example #2
0
def extractPhrases(englishToGerman, germanToEnglish):
	'''this method reads the file for both the source and target language and returns the phrases extracted from the
	sentences. The phrases are consistent in nature'''

	data=[]
	feg = open(englishToGerman, 'r')
	fge = open(germanToEnglish,'r')
	count = 0
	while True:
		count+=1
		print count
		line = feg.readline()
		if line == "":
			break
		englishToGerman1 = feg.readline()
		englishToGerman2 = feg.readline()
		#print englishToGerman1
		line = fge.readline()
		germanToEnglish1 = fge.readline()
		germanToEnglish2 = fge.readline()
		#print germanToEnglish1

		wordAlignment, english, german = findAlignment(englishToGerman1, englishToGerman2, germanToEnglish1, germanToEnglish2)

		lEnglish = len(english)
		lGerman = len(german)
		
		phrases = []
		for estart in range(lEnglish):
			for eend in range(estart,(lEnglish)):
				fstart = lGerman
				fend = -1
				for i in wordAlignment:
					if i <= eend and i >= estart:
						for j in wordAlignment[i]:
							fstart = min(j, fstart)
							fend = max(j, fend)
				if ((eend - estart) <= 20) or ((fend -fstart) <= 20) :
					phrases.append([estart, eend, fstart, fend])
		#print phrases
		for key in phrases:
			estart = key[0]
			eend = key[1]
			fstart = key [2]
			fend = key[3]
			phrase = extract (fstart, fend,estart, eend,wordAlignment, english, german)
			if phrase!= 'NULL':
				#print phrase
				data.append(phrase[0]+'\t'+phrase[1])
	feg.close()
	fge.close()

	f=open('phrases.txt','w')
	f.write('\n'.join(data))
	f.close()
def extractPhrases(sourceToTarget, targetToSource):
    '''this method reads the file for both the source and target language and returns the phrases extracted from the
	sentences. The phrases are consistent in nature'''

    data = []
    feg = open(sourceToTarget, 'r')
    fge = open(targetToSource, 'r')
    count = 0
    while True:
        count += 1
        print count
        line = feg.readline()
        if line == "":
            break
        sourceToTarget1 = feg.readline()
        sourceToTarget2 = feg.readline()
        #print sourceToTarget1
        line = fge.readline()
        targetToSource1 = fge.readline()
        targetToSource2 = fge.readline()
        #print targetToSource1

        wordAlignment, source, target = findAlignment(sourceToTarget1,
                                                      sourceToTarget2,
                                                      targetToSource1,
                                                      targetToSource2)

        lSource = len(source)
        lTarget = len(target)

        phrases = []
        for estart in range(lSource):
            for eend in range(estart, (lSource)):
                fstart = lTarget
                fend = -1
                for i in wordAlignment:
                    if i <= eend and i >= estart:
                        for j in wordAlignment[i]:
                            fstart = min(j, fstart)
                            fend = max(j, fend)
                if ((eend - estart) <= 20) or ((fend - fstart) <= 20):
                    phrases.append([estart, eend, fstart, fend])
        # print phrases
        for key in phrases:
            estart = key[0]
            eend = key[1]
            fstart = key[2]
            fend = key[3]
            phrase = extract(fstart, fend, estart, eend, wordAlignment, source,
                             target)
            if phrase != 'NULL':
                #print phrase
                tmp = (phrase[0].strip() + '\t' + phrase[1].strip())
                if tmp.strip() != "" and tmp not in data:
                    data.append(tmp)
    feg.close()
    fge.close()

    f = open('phrases.txt', 'w')
    f.write('\n'.join(data))
    f.close()
        _ = cv2.circle(disp, (p[0] - rect[0], p[1] - rect[1]), r, (0, 0, v),
                       cv2.FILLED)
    for i, p in enumerate(np.asarray(_2[:, :2] * 20, np.int32)):
        v = 128 + int(127 * (_2[i, 2] - mini) / (maxi - mini + 1))
        r = 5 if i == 0 else 1
        _ = cv2.circle(disp, (p[0] - rect[0], p[1] - rect[1]), r, (0, v, 0),
                       cv2.FILLED)
    return disp


disp = display(dst, src)
cv2.imshow("disp", disp)
cv2.waitKey(0)
cv2.destroyAllWindows()

R, t = findRigidTransform(dst, src)
src1 = dst @ R.T + t

disp = display(src1, src)
cv2.imshow("disp", disp)
cv2.waitKey(0)
cv2.destroyAllWindows()

R, t = findAlignment(dst, src)
src2 = dst @ R.T + t

disp = display(src2, src)
cv2.imshow("disp", disp)
cv2.waitKey(0)
cv2.destroyAllWindows()
def extractPhrases(source_target, target_source):
    '''
        Extract consistent phrases from the bi-lingual sentences 
    '''
    data = []
    src_align_file = open(source_target, 'r')
    trg_align_file = open(target_source, 'r')
    count = 0
    while True:
        count += 1
        #print(count)
        
        line = src_align_file.readline()
        if line == "":
            break
        target_txt = src_align_file.readline()
        source_align_idx = src_align_file.readline()
        #print("Target Text:", target_txt.rstrip('\n'))
        line = trg_align_file.readline()
        source_txt = trg_align_file.readline()
        target_align_idx = trg_align_file.readline()
        #print("Source Text:", source_txt.rstrip('\n'))

        if float(line.strip().split(':')[1].strip()) < 1e-18:
            continue

        word_alignment, source, target = findAlignment(target_txt, source_align_idx, source_txt, target_align_idx)
        print("Source Text:", source)
        print("Target Text:", target)
        print("Word Alignment:", word_alignment, end = "\n\n")

        src_len = len(source)
        trg_len = len(target)
        
        phrases = []
        for src_start in range(src_len):
            for src_end in range(src_start,(src_len)):
                trg_start = trg_len
                trg_end = -1
                for i in word_alignment:
                    if i <= src_end and i >= src_start:
                        for j in word_alignment[i]:
                            trg_start = min(j, trg_start)
                            trg_end = max(j, trg_end)
                if ((src_end - src_start) <= 5) or ((trg_end - trg_start) <= 5) :
                    phrases.append([src_start, src_end, trg_start, trg_end])
        #print(phrases)
        for key in phrases:
            src_start = key[0]
            src_end = key[1]
            trg_start = key [2]
            trg_end = key[3]
            phrase = extract (src_start, src_end, trg_start, trg_end, word_alignment, source, target)
            if phrase is not None:
                #print(phrase)
                data.append(phrase[0] + '\t' + phrase[1])
    src_align_file.close()
    trg_align_file.close()

    phrase_file = open('phrases.txt','w')
    phrase_file.write('\n'.join(data))
    phrase_file.close()