def compare_with_programs(self, count, file, dict): ''' Dictionary dict contains all files submitted by a single student. File 'file' is compared with this dictionary. ''' #print ' Compare with {0}'.format(file) #print dict with open(file, 'r') as f1 : textA = f1.read() cnt = 0 for i in dict : with open(i, 'r') as f2: textB = f2.read() lenTextA = len(textA) lenTextB = len(textB) if float(lenTextA)/lenTextB > 0.2 or lenTextA/lenTextB < 50 : if self.lang == 'vhdl' : vhdl = VHDL() text1, line1, word_count1 = vhdl.fix_text(textA, self.lang) text2, line2, word_count2 = vhdl.fix_text(textB, self.lang) elif self.lang == 'verilog' : verilog = Verilog() text1, line1 = verilog.fix_text(textA, self.lang) text2, line2 = verilog.fix_text(textB, self.lang) elif self.lang == 'ctype' : ctype = Ctype() text1, line1 = ctype.fix_text(textA, self.lang) text2, line2 = ctype.fix_text(textB, self.lang) elif self.lang == 'pdf' : pdf = Pdf() text1, line1 = pdf.fix_text(file, self.lang) text2, line2 = pdf.fix_text(i, self.lang) else : print "This language is not supported." print " ++ Comparing {0}:{2} <-> {1} : {3}".format(f1.name.split('/').pop() , f2.name.split('/').pop() , len(textA), len(textB)) s = difflib.SequenceMatcher(None, text1, text2) lst = s.get_matching_blocks() w = 0 for a, b, n in lst : w = w + len(lst)*n # there is no use of w < 200 file. if(len(text1.split()) < 3 or len(text2.split()) < 3) : pass elif len(text1.split()) > 200 or len(text2.split()) > 200 : f_ratio = 0.00 f_ratio = float(len(text1.split()))/ float(len(text2.split())) log = '{0}, {1}, {2}, {3}, {4}, {5} \n'.format(\ f_ratio , w, w/len(lst) ,s.ratio(), f1.name, f2.name ) self.log_file.write(log) self.log_list.append([f1.name, f2.name\ , s.ratio() ,f_ratio, w, w/len(lst)]) # if s.ratio() > 0.27 and s.ratio() < 0.42 : # print ' Mild copying is possible in following files' # print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ # .format(s.ratio(), f1.name, f2.name) # self.log_file_low.write(log) # if s.ratio() >= 0.42 and s.ratio() < 0.53 : # print ' Significant copying possible in files' # print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ # .format(s.ratio(), f1.name, f2.name) # self.log_file_med.write(log) if s.ratio() >= 0.53 and s.ratio() <= 0.62 : print ' *These two files matches significantly. Check manually.' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(s.ratio(), f1.name, f2.name) self.log_file_hig.write(log) if s.ratio() >= 0.62 : print ' *NOTICE : These files are copied!' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(s.ratio(), f1.name, f2.name) self.log_file_exa.write(log) else : pass #print 'No significant match.' #print '{0} : {1} : {2}'.format(s.ratio(), f1.name, f2.name) # Handle small files. Divide s.ratio() by a suitable number. else : a = [30,50,100,150,200,250,300] b = [0.7,0.81,0.85,0.88,0.89,0.95,0.99] poly_fit = numpy.polyfit(a, b, 3) scaled_by = float(min(line1, line2))/30.0 f_ratio = 0.00 f_ratio = float(len(text1.split()))/ float(len(text2.split())) ratio = s.ratio() * numpy.polyval(poly_fit, min(line1, line2)) log = '{0}, {1}, {2}, {3}, {4}, {5} \n'.format(\ f_ratio , w, w/len(lst) ,ratio, f1.name, f2.name ) self.log_file.write(log) self.log_list.append([f1.name, f2.name\ , ratio ,f_ratio, w, w/len(lst)]) # if ratio > 0.27 and ratio< 0.42 : # print ' Mild copying is possible in following files' # print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ # .format(ratio, f1.name, f2.name) # self.log_file_low.write(log) # if ratio >= 0.42 and ratio < 0.53 : # print ' Significant copying possible in files' # print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ # .format(ratio, f1.name, f2.name) # self.log_file_med.write(log) if ratio >= 0.53 and ratio <= 0.59 : print ' *These two files matches significantly. Check manually.' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(ratio, f1.name, f2.name) self.log_file_hig.write(log) if ratio >= 0.59 : print ' *NOTICE : These files are copied!' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(ratio, f1.name, f2.name) self.log_file_exa.write(log) else : pass
def compare_with_programs(self, file, dict, userA, count): ''' Dictionary dict contains all files submitted by a single student. File 'file' is compared with this dictionary. ''' #print ' Compare with {0}'.format(file) #print dict with open(file, 'r') as f1 : textA = f1.read() cnt = 0 for i in dict : with open(i, 'r') as f2: textB = f2.read() lenTextA = len(textA) lenTextB = len(textB) if float(lenTextA)/lenTextB > 0.2 or lenTextA/lenTextB < 5 : if self.lang == 'vhdl' : vhdl = VHDL() text1, line1 = vhdl.fix_text(textA) text2, line2 = vhdl.fix_text(textB) elif self.lang == 'verilog' : verilog = Verilog() text1, line1 = verilog.fix_text(textA) text2, line2 = verilog.fix_text(textB) elif self.lang == 'ctype' : ctype = Ctype() text1, line1 = ctype.fix_text(textA) text2, line2 = ctype.fix_text(textB) elif self.lang == 'pdf' : pdf = Pdf() text1, line1 = pdf.fix_text(file) text2, line2 = pdf.fix_text(i) else : print "This language is not supported." print "+{4}/{5} Comparing {0}:{2} <-> {1} : {3}".format(f1.name.split('/').pop() , f2.name.split('/').pop() , len(textA), len(textB) , count , len(self.file_dict) ) s = difflib.SequenceMatcher(None, text1, text2) lst = s.get_matching_blocks() w = 0 for a, b, n in lst : w = w + len(lst)*n # sqlite queries query = '''REPLACE INTO match (userA, fileA, sizeA, fileB, sizeB, match) VALUES (?, ?, ?, ?, ?, ?)''' queryRation = '''UPDATE match SET severity=? WHERE fileA=? AND fileB=?''' self.c.execute(query, (userA, f1.name, len(textA), f2.name, len(textB), s.ratio())) # there is no use of w < 200 file. if(len(text1.split()) < 3 or len(text2.split()) < 3) : pass elif len(text1.split()) > 200 or len(text2.split()) > 200 : f_ratio = 0.00 f_ratio = float(len(text1.split()))/ float(len(text2.split())) if s.ratio() > 0.27 and s.ratio() < 0.42 : print ' Mild copying is possible in following files' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(s.ratio(), f1.name, f2.name) self.c.execute(queryRation, ("mild", f1.name, f2.name)) elif s.ratio() >= 0.42 and s.ratio() < 0.53 : print ' Significant copying possible in files' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(s.ratio(), f1.name, f2.name) self.c.execute(queryRation, ("high", f1.name, f2.name)) elif s.ratio() >= 0.53 and s.ratio() <= 0.62 : print ' *These two files matches significantly. Check manually.' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(s.ratio(), f1.name, f2.name) self.c.execute(queryRation, ("veryhigh", f1.name, f2.name)) elif s.ratio() >= 0.62 : print ' *NOTICE : These files are copied!' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(s.ratio(), f1.name, f2.name) self.c.execute(queryRation, ("identical", f1.name, f2.name)) else : self.c.execute(queryRation, ("noise", f1.name, f2.name)) # Handle small files. Divide s.ratio() by a suitable number. else : a = [30,50,100,150,200,250,300] b = [0.7,0.81,0.85,0.88,0.89,0.95,0.99] poly_fit = numpy.polyfit(a, b, 3) scaled_by = float(min(line1, line2))/30.0 f_ratio = 0.00 f_ratio = float(len(text1.split()))/ float(len(text2.split())) ratio = s.ratio() * numpy.polyval(poly_fit, int(min(line1, line2))) if ratio > 0.27 and ratio< 0.42 : print ' Mild copying is possible in following files' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(ratio, f1.name, f2.name) self.c.execute(queryRation, ("mild", f1.name, f2.name)) elif ratio >= 0.42 and ratio < 0.53 : print ' Significant copying possible in files' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(ratio, f1.name, f2.name) self.c.execute(queryRation, ("high", f1.name, f2.name)) elif ratio >= 0.53 and ratio <= 0.59 : print ' *These two files matches significantly. Check manually.' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(ratio, f1.name, f2.name) self.c.execute(queryRation,("veryhigh", f1.name, f2.name)) elif ratio >= 0.59 : print ' *NOTICE : These files are copied!' print ' |- {1}\n |- {2}\n ++MATCH INDEX: {0} \n'\ .format(ratio, f1.name, f2.name) self.c.execute(queryRation, ("identical", f1.name, f2.name)) else : self.c.execute(queryRation, ("identical", f1.name, f2.name)) self.db.commit()