Ejemplo n.º 1
0
    def compare_with_programs(self, count,  file, dict):
        '''
        Dictionary dict contains all files submitted by a single student. File
        'file' is compared with this dictionary.
        
        '''
        #print ' Compare with {0}'.format(file)
        #print dict
        with open(file, 'r') as f1 :
            textA = f1.read()
            cnt = 0
            for i in dict :
                with open(i, 'r') as f2:
                    textB = f2.read()

                    lenTextA = len(textA)
                    lenTextB = len(textB)

                    if float(lenTextA)/lenTextB > 0.2 or lenTextA/lenTextB  < 50 :
                      if self.lang == 'vhdl' :
                          vhdl = VHDL()
                          text1, line1, word_count1 = vhdl.fix_text(textA, self.lang)
                          text2, line2, word_count2 = vhdl.fix_text(textB, self.lang)
                      
                      elif self.lang == 'verilog' :
                          verilog = Verilog()
                          text1, line1 = verilog.fix_text(textA, self.lang)
                          text2, line2  = verilog.fix_text(textB, self.lang)
              
                      elif self.lang == 'ctype' :
                          ctype = Ctype()
                          text1, line1 = ctype.fix_text(textA, self.lang)
                          text2, line2  = ctype.fix_text(textB, self.lang)
              
                      elif self.lang == 'pdf' :
                          pdf = Pdf()
                          text1, line1 = pdf.fix_text(file, self.lang)
                          text2, line2 = pdf.fix_text(i, self.lang)

                      else :
                          print "This language is not supported."

                      print " ++ Comparing {0}:{2} <-> {1} : {3}".format(f1.name.split('/').pop()
                                     , f2.name.split('/').pop()
                                     , len(textA), len(textB))
                      s = difflib.SequenceMatcher(None, text1, text2)
                      lst = s.get_matching_blocks()
                      w = 0
                      for a, b, n in lst :
                          w = w + len(lst)*n
      
                      # there is no use of w < 200 file.
                      if(len(text1.split()) < 3 or len(text2.split()) < 3) :
                          pass

                      elif len(text1.split()) > 200 or len(text2.split()) > 200 :
                          f_ratio = 0.00
                          f_ratio = float(len(text1.split()))/ float(len(text2.split()))
                          log = '{0}, {1}, {2}, {3}, {4}, {5} \n'.format(\
                              f_ratio , w, w/len(lst) ,s.ratio(), f1.name, f2.name )
                          self.log_file.write(log)
                          self.log_list.append([f1.name, f2.name\
                                  , s.ratio() ,f_ratio, w, w/len(lst)])

  #                        if s.ratio() > 0.27 and s.ratio() < 0.42  :
  #                            print '   Mild copying is possible in following files'
  #                            print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
  #                                    .format(s.ratio(), f1.name, f2.name)
  #                            self.log_file_low.write(log)
  #                        if s.ratio() >= 0.42 and s.ratio() < 0.53  :
  #                            print '   Significant copying possible in files'
  #                            print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
  #                                    .format(s.ratio(), f1.name, f2.name)
  #                            self.log_file_med.write(log)
                          if s.ratio() >= 0.53 and s.ratio() <= 0.62 :
                              print '   *These two files matches significantly. Check manually.'
                              print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                                      .format(s.ratio(), f1.name, f2.name)
                              self.log_file_hig.write(log)

                          if s.ratio() >= 0.62 :
                              print '   *NOTICE : These files are copied!'
                              print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                                      .format(s.ratio(), f1.name, f2.name)
                              self.log_file_exa.write(log)

                          else : pass
                              #print 'No significant match.'
                              #print '{0} : {1} : {2}'.format(s.ratio(), f1.name, f2.name)

                      # Handle small files. Divide s.ratio() by a suitable number.
                      else :
                          a = [30,50,100,150,200,250,300]
                          b = [0.7,0.81,0.85,0.88,0.89,0.95,0.99]

                          poly_fit = numpy.polyfit(a, b, 3)

                          scaled_by = float(min(line1, line2))/30.0
                          f_ratio = 0.00
                          f_ratio = float(len(text1.split()))/ float(len(text2.split()))
                          ratio = s.ratio() * numpy.polyval(poly_fit, min(line1, line2))
                          log = '{0}, {1}, {2}, {3}, {4}, {5} \n'.format(\
                              f_ratio , w, w/len(lst) ,ratio, f1.name, f2.name )
                          self.log_file.write(log)
                          self.log_list.append([f1.name, f2.name\
                                  , ratio ,f_ratio, w, w/len(lst)])

  #                        if ratio > 0.27 and ratio< 0.42  :
  #                            print '   Mild copying is possible in following files'
  #                            print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
  #                                    .format(ratio, f1.name, f2.name)
  #                            self.log_file_low.write(log)
  #                        if ratio >= 0.42 and ratio < 0.53  :
  #                            print '   Significant copying possible in files'
  #                            print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
  #                                    .format(ratio, f1.name, f2.name)
  #                            self.log_file_med.write(log)
                          if ratio >= 0.53 and ratio <= 0.59 :
                              print '   *These two files matches significantly. Check manually.'
                              print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                                      .format(ratio, f1.name, f2.name)
                              self.log_file_hig.write(log)

                          if ratio >= 0.59 :
                              print '   *NOTICE : These files are copied!'
                              print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                                      .format(ratio, f1.name, f2.name)
                              self.log_file_exa.write(log)

                          else : pass
Ejemplo n.º 2
0
    def compare_with_programs(self,  file, dict, userA, count):
      '''
      Dictionary dict contains all files submitted by a single student. File
      'file' is compared with this dictionary.
      
      '''
      #print ' Compare with {0}'.format(file)
      #print dict
      with open(file, 'r') as f1 :
        textA = f1.read()
        cnt = 0
        for i in dict :
          with open(i, 'r') as f2:
            textB = f2.read()

            lenTextA = len(textA)
            lenTextB = len(textB)

            if float(lenTextA)/lenTextB > 0.2 or lenTextA/lenTextB  < 5 :
              if self.lang == 'vhdl' :
                  vhdl = VHDL()
                  text1, line1 = vhdl.fix_text(textA)
                  text2, line2 = vhdl.fix_text(textB)
              
              elif self.lang == 'verilog' :
                  verilog = Verilog()
                  text1, line1 = verilog.fix_text(textA)
                  text2, line2  = verilog.fix_text(textB)
        
              elif self.lang == 'ctype' :
                  ctype = Ctype()
                  text1, line1 = ctype.fix_text(textA)
                  text2, line2  = ctype.fix_text(textB)
        
              elif self.lang == 'pdf' :
                  pdf = Pdf()
                  text1, line1 = pdf.fix_text(file)
                  text2, line2 = pdf.fix_text(i)

              else :
                  print "This language is not supported."

              print "+{4}/{5} Comparing {0}:{2} <-> {1} : {3}".format(f1.name.split('/').pop()
                             , f2.name.split('/').pop()
                             , len(textA), len(textB)
                             , count 
                             , len(self.file_dict)
                             )
              s = difflib.SequenceMatcher(None, text1, text2)
              lst = s.get_matching_blocks()
              w = 0
              for a, b, n in lst :
                  w = w + len(lst)*n
              
              # sqlite queries
              query = '''REPLACE INTO match 
                    (userA, fileA, sizeA, fileB, sizeB, match)
                    VALUES (?, ?, ?, ?, ?, ?)'''
              queryRation = '''UPDATE match SET severity=? WHERE fileA=?
                    AND fileB=?'''
             
              self.c.execute(query, (userA, f1.name, len(textA),
                  f2.name, len(textB), s.ratio()))
              # there is no use of w < 200 file.
              if(len(text1.split()) < 3 or len(text2.split()) < 3) :
                  pass
              elif len(text1.split()) > 200 or len(text2.split()) > 200 :
                f_ratio = 0.00
                f_ratio = float(len(text1.split()))/ float(len(text2.split()))
                if s.ratio() > 0.27 and s.ratio() < 0.42  :
                  print '   Mild copying is possible in following files'
                  print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                          .format(s.ratio(), f1.name, f2.name)
                  self.c.execute(queryRation, ("mild", f1.name,
                    f2.name))
                elif s.ratio() >= 0.42 and s.ratio() < 0.53  :
                  print '   Significant copying possible in files'
                  print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                          .format(s.ratio(), f1.name, f2.name)
                  self.c.execute(queryRation, ("high", f1.name, f2.name))
                elif s.ratio() >= 0.53 and s.ratio() <= 0.62 :
                  print '   *These two files matches significantly. Check manually.'
                  print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                          .format(s.ratio(), f1.name, f2.name)
                  self.c.execute(queryRation, ("veryhigh", f1.name, f2.name))
                elif s.ratio() >= 0.62 :
                  print '   *NOTICE : These files are copied!'
                  print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                          .format(s.ratio(), f1.name, f2.name)
                  self.c.execute(queryRation, ("identical", f1.name, f2.name))
                else : 
                  self.c.execute(queryRation, ("noise", f1.name,
                    f2.name))
              # Handle small files. Divide s.ratio() by a suitable number.
              else :
                  a = [30,50,100,150,200,250,300]
                  b = [0.7,0.81,0.85,0.88,0.89,0.95,0.99]

                  poly_fit = numpy.polyfit(a, b, 3)

                  scaled_by = float(min(line1, line2))/30.0
                  f_ratio = 0.00
                  f_ratio = float(len(text1.split()))/ float(len(text2.split()))
                  ratio = s.ratio() * numpy.polyval(poly_fit, int(min(line1,
                    line2)))
                  if ratio > 0.27 and ratio< 0.42  :
                    print '   Mild copying is possible in following files'
                    print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                         .format(ratio, f1.name, f2.name)
                    self.c.execute(queryRation, ("mild", f1.name, f2.name))
                  elif ratio >= 0.42 and ratio < 0.53  :
                    print '   Significant copying possible in files'
                    print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                            .format(ratio, f1.name, f2.name)
                    self.c.execute(queryRation, ("high", f1.name, f2.name))
                  elif ratio >= 0.53 and ratio <= 0.59 :
                    print '   *These two files matches significantly. Check manually.'
                    print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                            .format(ratio, f1.name, f2.name)
                    self.c.execute(queryRation,("veryhigh", f1.name, f2.name)) 
                  elif ratio >= 0.59 :
                    print '   *NOTICE : These files are copied!'
                    print '   |- {1}\n   |- {2}\n   ++MATCH INDEX: {0} \n'\
                            .format(ratio, f1.name, f2.name)
                    self.c.execute(queryRation, ("identical", f1.name, f2.name))
                  else :
                    self.c.execute(queryRation, ("identical", f1.name, f2.name))
      self.db.commit()