linecount = 0 #keeps a line count plagper = 0 #finds the plagiarism percentage for contentline in content: #going through each line in the user data if (contentline != "\n"): #get rid of unrequired lines read linecount += 1 #keep a count of content lines URLsToCheck = None #variable to store url found URLsToCheck = getURL1.URLFinder(contentline) #finds the url somevar = 0 #a flag variable if (URLsToCheck != None): webSearch.searchResults(URLsToCheck) #scraps from the url somevar = comparefuzzy.check( contentline) #compares with the scraped data if (somevar > 85): plagper += 100 #kept a threshold value of 85% as per the performance of the algo seen before...for Levenshtein Distance matched.write("Line-" + str(linecount) + "::" + URLsToCheck + "\n") #writing for matched sources # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n") #writing for highlighting highlight.write(contentline.upper() + "Source::{" + URLsToCheck + "}\n") else: plagper += somevar highlight.write(contentline) #writing non-highlighted #NOTE: But to decide whether a Document is Plagiarised or Not, we have kept a threshold value of 70%...or as per the user...
for contentline in content: #going through each line in the user data if (contentline != "\n"): #get rid of unrequired lines read linecount += 1 URLsToCheck = [None] * 3 #For checking first 3 URLs URLsToCheck = getURL3.URLFinder(contentline) maxval = 0 #a flag variable for j in range(3): #for all 3 urls if (URLsToCheck[j] != None): webSearch.searchResults(URLsToCheck[j]) tempval = comparefuzzy.check(contentline) if (tempval > maxval): maxval = tempval maxurl = URLsToCheck[j] tempval = 0 if (maxval > 85): break if (maxval > 85): #85%...a threshold value for Levenshtein Distance... plagper += 100 #kept a threshold value of 85% as per the performance of the algo seen before matched.write("Line-" + str(linecount) + "::" + maxurl + "\n") #writing for matched sources # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n") #writing for highlighting highlight.write(contentline.upper() + "Source::{" + maxurl + "}\n") else:
# URLsToCheck = [None]*3 #For checking first 3 URLs URLsToCheck = None URLsToCheck = getURL.URLFinder(contentline) # for j in range(3): # if(URLsToCheck[j]!=None): if (URLsToCheck != None): # print("Checking: "+URLsToCheck[j]+"\n") # webSearch.searchResults(URLsToCheck[j]) webSearch.searchResults(URLsToCheck) # print("Scrapped Text from "+URLsToCheck[j]+"\n") # if(compareContent.check(contentline)): # # print("A Plag Found!!!...\n") # plagflag+=1 # break somevar = comparefuzzy.check(contentline) # if(somevar>75): # plagper += 100 #kept a threshold value of 75% as per the performance of the algo seen before # else: plagper += somevar if ( somevar > 75 ): #kept 75 so as to compensate for the errors generated by common words and sentences matched.write("Line-" + str(linecount) + "::" + URLsToCheck + "\n") #writing for matched sources highlight.write("<font color=\"red\"><b>" + contentline + "</b></font>\n") #writing for highlighting else: highlight.write(contentline) #writing non-highlighted