Esempio n. 1
0
linecount = 0  #keeps a line count
plagper = 0  #finds the plagiarism percentage
for contentline in content:  #going through each line in the user data

    if (contentline != "\n"):  #get rid of unrequired lines read

        linecount += 1  #keep a count of content lines

        URLsToCheck = None  #variable to store url found
        URLsToCheck = getURL1.URLFinder(contentline)  #finds the url

        somevar = 0  #a flag variable

        if (URLsToCheck != None):
            webSearch.searchResults(URLsToCheck)  #scraps from the url
            somevar = comparefuzzy.check(
                contentline)  #compares with the scraped data
            if (somevar > 85):
                plagper += 100  #kept a threshold value of 85% as per the performance of the algo seen before...for Levenshtein Distance

                matched.write("Line-" + str(linecount) + "::" + URLsToCheck +
                              "\n")  #writing for matched sources
                # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n")	#writing for highlighting
                highlight.write(contentline.upper() + "Source::{" +
                                URLsToCheck + "}\n")
            else:
                plagper += somevar

                highlight.write(contentline)  #writing non-highlighted

#NOTE: But to decide whether a Document is Plagiarised or Not, we have kept a threshold value of 70%...or as per the user...
Esempio n. 2
0
for contentline in content:  #going through each line in the user data

    if (contentline != "\n"):  #get rid of unrequired lines read

        linecount += 1

        URLsToCheck = [None] * 3  #For checking first 3 URLs
        URLsToCheck = getURL3.URLFinder(contentline)

        maxval = 0  #a flag variable

        for j in range(3):  #for all 3 urls
            if (URLsToCheck[j] != None):
                webSearch.searchResults(URLsToCheck[j])
                tempval = comparefuzzy.check(contentline)
                if (tempval > maxval):
                    maxval = tempval
                    maxurl = URLsToCheck[j]
                    tempval = 0
                if (maxval > 85):
                    break

        if (maxval > 85):  #85%...a threshold value for Levenshtein Distance...
            plagper += 100  #kept a threshold value of 85% as per the performance of the algo seen before

            matched.write("Line-" + str(linecount) + "::" + maxurl +
                          "\n")  #writing for matched sources
            # highlight.write("<font color=\"red\"><b>"+contentline+"</b></font>\n")	#writing for highlighting
            highlight.write(contentline.upper() + "Source::{" + maxurl + "}\n")
        else:
Esempio n. 3
0
        # URLsToCheck = [None]*3	#For checking first 3 URLs
        URLsToCheck = None
        URLsToCheck = getURL.URLFinder(contentline)

        # for j in range(3):
        # if(URLsToCheck[j]!=None):
        if (URLsToCheck != None):
            # print("Checking: "+URLsToCheck[j]+"\n")
            # webSearch.searchResults(URLsToCheck[j])
            webSearch.searchResults(URLsToCheck)
            # print("Scrapped Text from "+URLsToCheck[j]+"\n")
        # if(compareContent.check(contentline)):
        # 	# print("A Plag Found!!!...\n")
        # 	plagflag+=1
        # 	break
        somevar = comparefuzzy.check(contentline)

        # if(somevar>75):
        # 	plagper += 100	#kept a threshold value of 75% as per the performance of the algo seen before
        # else:
        plagper += somevar

        if (
                somevar > 75
        ):  #kept 75 so as to compensate for the errors generated by common words and sentences
            matched.write("Line-" + str(linecount) + "::" + URLsToCheck +
                          "\n")  #writing for matched sources
            highlight.write("<font color=\"red\"><b>" + contentline +
                            "</b></font>\n")  #writing for highlighting
        else:
            highlight.write(contentline)  #writing non-highlighted