Python reduceTashkeel Examples

Programming Language: Python

Namespace/Package Name: pyarabic.araby

Method/Function: reduceTashkeel

Examples at hotexamples.com: 7

Python reduceTashkeel - 7 examples found. These are the top rated real world Python examples of pyarabic.araby.reduceTashkeel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def reducedTashkeelText(text):
    """
	Reduce Harakat and vocalization from a vocalized text.
	@param text: a given vocalized text.
	@type text: unicode.
	@return : reduced text vocalization
	@rtype: unicode
	"""
    return araby.reduceTashkeel(text)

Example #2

Show file

File: adaat.py Project: ATouhou/mishkal

def reducedTashkeelText(text):
	"""
	Reduce Harakat and vocalization from a vocalized text.
	@param text: a given vocalized text.
	@type text: unicode.
	@return : reduced text vocalization
	@rtype: unicode
	"""
	return araby.reduceTashkeel(text);

Example #3

Show file

def test():
    options = grabargs()

    filename = options['fname']
    outfilename = options['ofname']
    text = options['text']
    strip_tashkeel = options['strip_tashkeel']
    nocache = options['nocache']
    reducedTashkeel = options['reducedTashkeel']
    disableSyntax = options['disableSyntax']
    disableSemantic = options['disableSemantic']
    disableStat = options['disableStatistic']
    ignore = options['ignore']
    limit = options['limit']
    compare = options['compare']
    progress = options['progress']
    enable_syn_train = options['train']

    # filename = "samples/randomtext.txt"
    if not text and not filename:
        usage()
        sys.exit(0)

    if not text:
        try:
            myfile = open(filename)
            print("input file:", filename)
            if not outfilename:
                outfilename = filename + " (Tashkeel).txt"
            print("output file:", outfilename)
            outfile = open(outfilename, "w")
        except:
            print(" Can't Open the given File ", filename)
            sys.exit()
    else:
        lines = text.split('\n')
    # all things are well, import library
    import core.adaat
    import pyarabic.araby as araby

    counter = 1
    if not limit:
        limit = 100000000
    if not strip_tashkeel:
        vocalizer = ArabicVocalizer.TashkeelClass()
        if nocache:
            vocalizer.disable_cache()
            # print "nocache"
        if ignore:
            vocalizer.disable_last_mark()
        if disableSemantic:
            vocalizer.disable_semantic_analysis()
        if disableSyntax:
            vocalizer.disable_syntaxic_analysis()
        if disableStat:
            vocalizer.disable_stat_tashkeel()
        if enable_syn_train:
            vocalizer.enable_syn_train()
            # print "mishkal-console, vocalizer.anasynt.syntax_train_enabled", vocalizer.anasynt.syntax_train_enabled

    # vocalizer.disableShowCollocationMark()
    # print "show delimiter", vocalizer.collo.showDelimiter
    # nolimit = True
    nolimit = False
    if not text:
        line = (myfile.readline()).decode('utf8')
    else:
        if len(lines) > 0:
            line = lines[0]
    correct = 0
    incorrect = 0
    total = 0
    totLetters = 0
    LettersError = 0
    WLMIncorrect = 0
    percent = 0
    if compare:
        # dispaly stats for the current line
        print(
            "id\tfully Correct\tStrip Correct\tfully WER\tStrip WER\tLER\tTotal\tline Fully correct\tline Strip correct\tLine"
        )

    while line and (nolimit or counter <= limit):
        if not line.startswith('# '):
            line = line.strip()
            lineCorrect = 0
            lineWLMIncorrect = 0
            if strip_tashkeel:
                result = araby.strip_tashkeel(line)
            else:  # vocalize line by line
                if not compare:
                    result = vocalizer.tashkeel(line)
                if compare:
                    inputVocalizedLine = line
                    inputlist = vocalizer.analyzer.tokenize(inputVocalizedLine)
                    inputUnvocalizedLine = araby.strip_tashkeel(line)
                    vocalized_dict = vocalizer.tashkeel_ouput_html_suggest(
                        inputUnvocalizedLine)

                    # stemmer = tashaphyne.stemming.ArabicLightStemmer()
                    # ~texts = vocalizer.analyzer.split_into_phrases(inputVocalizedLine)
                    # ~inputlist = []
                    # ~for txt in texts:
                    # ~inputlist += vocalizer.analyzer.text_tokenize(txt)
                    outputlist = [x.get("chosen", '') for x in vocalized_dict]
                    result = u" ".join(outputlist)
                    outputlistsemi = [
                        x.get("semi", '') for x in vocalized_dict
                    ]
                    total += len(inputlist)
                    lineTotal = len(inputlist)
                    if len(inputlist) != len(outputlist):
                        print("lists haven't the same length")
                        print(len(inputlist), len(outputlist))
                        print(u"# ".join(inputlist).encode('utf8'))
                        print(u"# ".join(outputlist).encode('utf8'))
                    else:
                        for inword, outword, outsemiword in zip(
                                inputlist, outputlist, outputlistsemi):
                            simi = araby.vocalized_similarity(inword, outword)
                            if simi < 0:
                                LettersError += -simi
                                incorrect += 1
                                # evaluation without last haraka
                                simi2 = araby.vocalized_similarity(
                                    inword, outsemiword)
                                if simi2 < 0:
                                    WLMIncorrect += 1
                                    lineWLMIncorrect += 1
                            else:
                                correct += 1
                                lineCorrect += 1

            # compare resultLine and vocalizedLine
            if reducedTashkeel:
                result = araby.reduceTashkeel(result)
            # print result.encode('utf8')
            counter += 1

            # display stat for every line
            if compare:
                print("%d\t%0.2f%%\t%0.2f%%\t%d\t%d\t%d\t%d\t" % (
                    counter - 1,  # id
                    round(correct * 100.00 / total, 2),  # fully Correct
                    round((total - WLMIncorrect) * 100.00 / total,
                          2),  # Strip Correct
                    incorrect,  # fully WER
                    WLMIncorrect,  # Strip WER
                    LettersError,  # LER
                    total  # Total
                ))
                if lineTotal:
                    print("%0.2f%%\t" %
                          round(lineCorrect * 100.00 / lineTotal, 2)
                          )  # line Fully correct
                    print("%0.2f%%\t" % round(
                        (lineTotal - lineWLMIncorrect) * 100.00 / lineTotal, 2)
                          )  # line Strip correct

            # ~ print result.strip('\n').encode('utf8'),
            if text:
                print result.strip('\n').encode('utf8'),
            else:
                result_line = result.encode('utf8')
                print result_line
                # add line and new line to output file
                outfile.write(result_line)
                outfile.write("\n")

        if progress and not nolimit:
            # ~percent = (counter * 100/ limit ) if (counter / limit * 100 >percent) else percent
            sys.stderr.write(
                "\r[%d%%]%d/%d lines    Full %0.2f Strip %0.2f     " % (
                    counter * 100 / limit,
                    counter,
                    limit,
                    round(correct * 100.00 / total, 2),  # fully Correct
                    round((total - WLMIncorrect) * 100.00 / total,
                          2)  # Strip Correct
                ))
            # ~sys.stderr.write("treatment of "+line.encode('utf8'))
            sys.stderr.flush()

        # get the next line
        if not text:
            line = (myfile.readline()).decode('utf8')
        else:
            if counter < len(lines):
                line = lines[counter]
            else:
                line = None
    else:
        print("Done")

Example #4

Show file

def test():
    args = grabargs()

    filename = args.filename
    filename2 = args.compareto  # used for comparison
    if filename2:
        compare = True
    else:
        compare = False
    outfilename = args.outfile
    text = args.text
    if not text and not filename:
        print('Try: mishkal-console.py -h')
        sys.exit(0)
    # tashkeel command
    command = args.command
    strip_tashkeel = False
    reducedTashkeel = False
    commandTashkeel = False
    if command == "strip":
        strip_tashkeel = True
    elif command == "reduce":
        reducedTashkeel = True
    else:
        commandTashkeel = True
    # general options
    limit = args.limit
    progress = args.progress
    verbose = args.verbose

    # options
    ignore = args.ignore
    cache = args.cache
    disableSyntax = args.syntax
    disableSemantic = args.semantic
    disableStat = args.stat
    enable_syn_train = args.train
    evaluation = args.evaluation

    # Open file
    if not text:
        try:
            myfile = open(filename, encoding='utf8')
            print("input file:", filename)
            if not outfilename:
                outfilename = filename + ".Tashkeel.txt"
            print("output file:", outfilename)
            outfile = open(outfilename, "w")
        except:
            print(" Can't Open the given File ", filename)
            sys.exit()
    else:
        lines = text.strip().split('\n')
    if compare and filename2:
        try:
            myfile2 = open(filename2, encoding='utf8')
            print("input file2:", filename2)
        except:
            print(" Can't Open the given File ", filename2)
            sys.exit()

    # all things are well, import library

    myconsole = tashkeel_console.Tashkeel_console()
    #~ myconsole.counter = 1
    myconsole.limit = limit
    if not limit:
        # count lines in files if filename, otherwise count lines in text
        if filename:
            with open(filename) as f:
                limit = sum(1 for line in f)
        else:
            limit = len(lines)
    if not strip_tashkeel:
        vocalizer = ArabicVocalizer.TashkeelClass()
        if cache:
            vocalizer.enable_cache()
            sys.stderr.write(" Mishkal use a cache")
        if ignore:
            vocalizer.disable_last_mark()
        if disableSemantic:
            vocalizer.disable_semantic_analysis()
        if disableSyntax:
            vocalizer.disable_syntaxic_analysis()
        if disableStat:
            vocalizer.disable_stat_tashkeel()
        if enable_syn_train:
            vocalizer.enable_syn_train()
        # if verbose option, then activate logger in ArabicVocalizer
        if verbose:
            vocalizer.enable_verbose()

    if not text:
        line = (myfile.readline())  #.decode('utf8')
    else:
        if len(lines) > 0:
            line = lines[0]
        # get the next line to compare
    if compare:
        line_base = myfile2.readline().strip()
    if evaluation:
        myconsole.header()

    while line and myconsole.counter <= limit:
        line = line.strip()
        #~ myconsole.lineCorrect = 0
        #~ myconsole.lineWLMIncorrect = 0
        if strip_tashkeel:
            result = araby.strip_tashkeel(line)
        elif compare:
            myconsole.compare(line_base, line)
            myconsole.display_line_stat()
            result = line
            print("base :", line_base)
            print("input:", line)
        #~ else:    # vocalize line by line
        elif not evaluation:
            result = vocalizer.tashkeel(line)
            myconsole.total += len(araby.tokenize(line))
        elif evaluation:
            inputUnvocalizedLine = araby.strip_tashkeel(line)
            vocalized_dict = vocalizer.tashkeel_ouput_html_suggest(
                inputUnvocalizedLine)
            outputlist = [x.get("chosen", '') for x in vocalized_dict]
            result = u" ".join(outputlist)
            myconsole.compare(line, vocalized_dict)
            # display stat for every line
            myconsole.display_line_stat()
        # compare resultLine and vocalizedLine
        if reducedTashkeel:
            result = araby.reduceTashkeel(result)

        if text:
            print(result.strip('\n'), end='')
        else:
            result_line = result
            if verbose:
                print(result_line)
            # add line and new line to output file
            outfile.write(result_line)
            outfile.write("\n")

        if progress:
            # show progress bar
            myconsole.progress(compare)

        myconsole.counter += 1
        # get the next line
        if not text:
            line = (myfile.readline())
        else:
            if myconsole.counter < len(lines):
                line = lines[myconsole.counter]
            else:
                line = None
        # get the next line to compare
        if compare:
            line_base = myfile2.readline().strip()

    if progress:
        myconsole.footer()

Example #5

Show file

File: mishkal-console.py Project: ATouhou/mishkal

def test():
	filename, text,  stripTashkeel, reducedTashkeel, disableSyntax, disableSemantic, disableStat, ignore, limit, compare =grabargs()
	#filename="samples/randomtext.txt"	
	if not text and not filename:
		usage()
		sys.exit(0)
		
	if not text:
		try:
			myfile=open(filename)
		except:
			print " Can't Open the given File ", filename;
			sys.exit();
	else:
		lines = text.split('\n');
	# all things are well, import library
	import core.adaat 
	import pyarabic.araby as araby

	counter=1;
	if not limit : 
		limit=	100000000
	if not stripTashkeel: 
		vocalizer=ArabicVocalizer.TashkeelClass();
		if ignore : 
			vocalizer.disableLastMark();
		if disableSemantic:
			vocalizer.disableSemanticAnalysis();
		if disableSyntax:
			vocalizer.disableSyntaxicAnalysis();
		if disableStat:
			vocalizer.disableStatTashkeel();

	#vocalizer.disableShowCollocationMark();
	#print "show delimiter", vocalizer.collo.showDelimiter;
	#nolimit = True;
	nolimit = False;
	if not text:
		line=(myfile.readline()).decode('utf8');
	else:
		if len(lines)>0:
			line= lines[0];
	correct=0;
	incorrect=0;
	total=0;
	totLetters =0;
	LettersError =0
	WLMIncorrect =0;
	if compare:
		#dispaly stats for the current line
		print "id\tfully Correct\tStrip Correct\tfully WER\tStrip WER\tLER\tTotal\tline Fully correct\tline Strip correct"
		
		# print "Full\tPartial\tFull correct \tfull incorrect\tpartial correct\tpartial incorrect\tWER\tLER\tTotal"
	
	while line and (nolimit or counter<=limit):
		if not line.startswith('#'):
			# lineIncorrect = 0;
			lineCorrect   = 0;
			lineWLMIncorrect =0;
			if stripTashkeel:
				result = araby.stripTashkeel(line);
			else:	#vocalize line by line
				if compare:
					vocalizedLine = line;
					line = araby.stripTashkeel(line)
				result=vocalizer.tashkeel(line);
				#compare resultLine and vocalizedLine
				if compare:
					list1=vocalizer.analyzer.tokenize(vocalizedLine);
					list2=vocalizer.analyzer.tokenize(result);
					#print u":".join(list1).encode('utf8');
					#print u":".join(list2).encode('utf8');
					total+=len(list1);
					lineTotal = len(list1);
					if len(list1)!=len(list2):
						print "lists haven't the same length";
					else:
						for i in range(len(list1)):
							simi = araby.vocalizedSimilarity(list1[i],list2[i]);
							if simi<0:
								LettersError+= -simi;
								incorrect   +=1;
								# lineIncorrect += 1;
								# evaluation without last haraka
								simi2 = araby.vocalizedSimilarity(araby.stripLastHaraka(list1[i]),araby.stripLastHaraka(list2[i]));
								if simi2<0: 
									WLMIncorrect    +=1;
									lineWLMIncorrect+=1;								

							else:
								correct+=1;
								lineCorrect += 1;
					
			#compare resultLine and vocalizedLine
			if reducedTashkeel:
				result= araby.reduceTashkeel(result)
			# print result.encode('utf8');
			counter+=1;

			#display stat for every line
			if compare:
				print "%d\t%0.2f%%\t%0.2f%%\t%d\t%d\t%d\t%d\t"%(
						counter-1,#id
						round(correct*100.00/total,2),#fully Correct
						round((total-WLMIncorrect)*100.00/total,2),#Strip Correct
						incorrect,#fully WER
						WLMIncorrect,#Strip WER
						LettersError,#LER
						total,#Total
						),
				if lineTotal:
					print "%0.2f%%\t"%round(lineCorrect*100.00/lineTotal,2),#line Fully correct
					print "%0.2f%%\t"%round((lineTotal-lineWLMIncorrect)*100.00/lineTotal,2),#line Strip correct
						
			print result.encode('utf8');
		#get the next line
		if not text:
			line=(myfile.readline()).decode('utf8');
		else:
			if counter<len(lines):
				line= lines[counter];
			else:
				line =None;

Example #6

Show file

File: mishkal-console.py Project: tazjel/mishkal

def test():
    options = grabargs()

    filename = options['fname']
    text     = options['text']
    strip_tashkeel  = options['strip_tashkeel']
    nocache         = options['nocache']
    reducedTashkeel = options['reducedTashkeel']
    disableSyntax   = options['disableSyntax']
    disableSemantic = options['disableSemantic']
    disableStat     = options['disableStatistic']
    ignore = options['ignore']
    limit  = options['limit']
    compare = options['compare']
    progress = options['progress']
        
    #filename = "samples/randomtext.txt"    
    if not text and not filename:
        usage()
        sys.exit(0)
        
    if not text:
        try:
            myfile = open(filename)
        except:
            print " Can't Open the given File ", filename
            sys.exit()
    else:
        lines = text.split('\n')
    # all things are well, import library
    import core.adaat 
    import pyarabic.araby as araby

    counter = 1
    if not limit : 
        limit = 100000000
    if not strip_tashkeel: 
        vocalizer = ArabicVocalizer.TashkeelClass()
        if nocache : 
            vocalizer.disable_cache()
            print "nocache"
        if ignore : 
            vocalizer.disable_last_mark()
        if disableSemantic:
            vocalizer.disable_semantic_analysis()
        if disableSyntax:
            vocalizer.disable_syntaxic_analysis()
        if disableStat:
            vocalizer.disable_stat_tashkeel()

    #vocalizer.disableShowCollocationMark()
    #print "show delimiter", vocalizer.collo.showDelimiter
    #nolimit = True
    nolimit = False
    if not text:
        line = (myfile.readline()).decode('utf8')
    else:
        if len(lines)>0:
            line = lines[0]
    correct = 0
    incorrect = 0
    total = 0
    totLetters = 0
    LettersError = 0
    WLMIncorrect = 0
    percent = 0
    if compare:
        #dispaly stats for the current line
        print "id\tfully Correct\tStrip Correct\tfully WER\tStrip WER\tLER\tTotal\tline Fully correct\tline Strip correct"
        
    while line and (nolimit or counter <= limit):
        if progress and not nolimit:
            #~percent = (counter * 100/ limit ) if (counter / limit * 100 >percent) else percent
            sys.stderr.write("\r[%d%%]%d/%d lines" %(counter * 100/ limit, counter, limit))
            #~sys.stderr.write("treatment of "+line.encode('utf8'))
            sys.stderr.flush()
        if not line.startswith('#'):
            line = line.strip()
            lineCorrect = 0
            lineWLMIncorrect = 0
            if strip_tashkeel:
                result = araby.strip_tashkeel(line)
            else:    #vocalize line by line
                if not compare:
                    result = vocalizer.tashkeel(line)                    
                if compare:
                    inputVocalizedLine = line
                    inputlist = vocalizer.analyzer.tokenize(inputVocalizedLine)
                    inputUnvocalizedLine = araby.strip_tashkeel(line)
                    vocalized_dict = vocalizer.tashkeel_ouput_html_suggest(inputUnvocalizedLine)


                    #stemmer=tashaphyne.stemming.ArabicLightStemmer()
                    #~texts = vocalizer.analyzer.split_into_phrases(inputVocalizedLine)
                    #~inputlist =[]
                    #~for txt in texts:
                        #~inputlist += vocalizer.analyzer.text_tokenize(txt)
                    outputlist = [x.get("chosen",'') for x in vocalized_dict]
                    result = u" ".join(outputlist)
                    outputlistsemi = [x.get("semi",'') for x in vocalized_dict]
                    total += len(inputlist)
                    lineTotal = len(inputlist)
                    if len(inputlist) != len(outputlist):
                        print "lists haven't the same length"
                        print len(inputlist), len(outputlist)
                        print u"#".join(inputlist).encode('utf8')
                        print u"#".join(outputlist).encode('utf8')
                    else:
                        for inword, outword, outsemiword in zip(inputlist, outputlist, outputlistsemi):
                            simi = araby.vocalized_similarity(inword, outword)
                            if simi<0:
                                LettersError += -simi
                                incorrect    += 1
                                # evaluation without last haraka
                                simi2 = araby.vocalized_similarity(inword, outsemiword)
                                if simi2<0: 
                                    WLMIncorrect     += 1
                                    lineWLMIncorrect += 1                                
                            else:
                                correct += 1
                                lineCorrect  += 1
                    
            #compare resultLine and vocalizedLine
            if reducedTashkeel:
                result = araby.reduceTashkeel(result)
            # print result.encode('utf8')
            counter += 1

            #display stat for every line
            if compare:
                print "%d\t%0.2f%%\t%0.2f%%\t%d\t%d\t%d\t%d\t"%(
                        counter-1, #id
                        round(correct*100.00/total, 2), #fully Correct
                        round((total-WLMIncorrect)*100.00/total, 2), #Strip Correct
                        incorrect, #fully WER
                        WLMIncorrect, #Strip WER
                        LettersError, #LER
                        total, #Total
                        ), 
                if lineTotal:
                    print "%0.2f%%\t"%round(lineCorrect*100.00/lineTotal, 2), #line Fully correct
                    print "%0.2f%%\t"%round((lineTotal-lineWLMIncorrect)*100.00/lineTotal, 2), #line Strip correct
                        
            print result.encode('utf8')
        #get the next line
        if not text:
            line = (myfile.readline()).decode('utf8')
        else:
            if counter<len(lines):
                line = lines[counter]
            else:
                line = None

Example #7

Show file

def test():
    filename, text, stripTashkeel, reducedTashkeel, disableSyntax, disableSemantic, disableStat, ignore, limit, compare = grabargs(
    )
    #filename="samples/randomtext.txt"
    if not text and not filename:
        usage()
        sys.exit(0)

    if not text:
        try:
            myfile = open(filename)
        except:
            print " Can't Open the given File ", filename
            sys.exit()
    else:
        lines = text.split('\n')
    # all things are well, import library
    import core.adaat
    import pyarabic.araby as araby

    counter = 1
    if not limit:
        limit = 100000000
    if not stripTashkeel:
        vocalizer = ArabicVocalizer.TashkeelClass()
        if ignore:
            vocalizer.disableLastMark()
        if disableSemantic:
            vocalizer.disableSemanticAnalysis()
        if disableSyntax:
            vocalizer.disableSyntaxicAnalysis()
        if disableStat:
            vocalizer.disableStatTashkeel()

    #vocalizer.disableShowCollocationMark();
    #print "show delimiter", vocalizer.collo.showDelimiter;
    #nolimit = True;
    nolimit = False
    if not text:
        line = (myfile.readline()).decode('utf8')
    else:
        if len(lines) > 0:
            line = lines[0]
    correct = 0
    incorrect = 0
    total = 0
    totLetters = 0
    LettersError = 0
    WLMIncorrect = 0
    if compare:
        #dispaly stats for the current line
        print "id\tfully Correct\tStrip Correct\tfully WER\tStrip WER\tLER\tTotal\tline Fully correct\tline Strip correct"

        # print "Full\tPartial\tFull correct \tfull incorrect\tpartial correct\tpartial incorrect\tWER\tLER\tTotal"

    while line and (nolimit or counter <= limit):
        if not line.startswith('#'):
            # lineIncorrect = 0;
            lineCorrect = 0
            lineWLMIncorrect = 0
            if stripTashkeel:
                result = araby.stripTashkeel(line)
            else:  #vocalize line by line
                if compare:
                    vocalizedLine = line
                    line = araby.stripTashkeel(line)
                result = vocalizer.tashkeel(line)
                #compare resultLine and vocalizedLine
                if compare:
                    list1 = vocalizer.analyzer.tokenize(vocalizedLine)
                    list2 = vocalizer.analyzer.tokenize(result)
                    #print u":".join(list1).encode('utf8');
                    #print u":".join(list2).encode('utf8');
                    total += len(list1)
                    lineTotal = len(list1)
                    if len(list1) != len(list2):
                        print "lists haven't the same length"
                    else:
                        for i in range(len(list1)):
                            simi = araby.vocalizedSimilarity(
                                list1[i], list2[i])
                            if simi < 0:
                                LettersError += -simi
                                incorrect += 1
                                # lineIncorrect += 1;
                                # evaluation without last haraka
                                simi2 = araby.vocalizedSimilarity(
                                    araby.stripLastHaraka(list1[i]),
                                    araby.stripLastHaraka(list2[i]))
                                if simi2 < 0:
                                    WLMIncorrect += 1
                                    lineWLMIncorrect += 1

                            else:
                                correct += 1
                                lineCorrect += 1

            #compare resultLine and vocalizedLine
            if reducedTashkeel:
                result = araby.reduceTashkeel(result)
            # print result.encode('utf8');
            counter += 1

            #display stat for every line
            if compare:
                print "%d\t%0.2f%%\t%0.2f%%\t%d\t%d\t%d\t%d\t" % (
                    counter - 1,  #id
                    round(correct * 100.00 / total, 2),  #fully Correct
                    round((total - WLMIncorrect) * 100.00 / total,
                          2),  #Strip Correct
                    incorrect,  #fully WER
                    WLMIncorrect,  #Strip WER
                    LettersError,  #LER
                    total,  #Total
                ),
                if lineTotal:
                    print "%0.2f%%\t" % round(lineCorrect * 100.00 / lineTotal,
                                              2),  #line Fully correct
                    print "%0.2f%%\t" % round(
                        (lineTotal - lineWLMIncorrect) * 100.00 / lineTotal,
                        2),  #line Strip correct

            print result.encode('utf8')
        #get the next line
        if not text:
            line = (myfile.readline()).decode('utf8')
        else:
            if counter < len(lines):
                line = lines[counter]
            else:
                line = None