def highlightAssignment(fingerprints, output): """Highlights fingerprints and outputs into a table entry""" index = 0 output.write('<td>') output.write('<p> Files by ' + fingerprints[0].auth + ':</p>\n') #Defines html code to start and end highlighting. startHighlight = '<SPAN style="BACKGROUND-COLOR: #ffff00">' endHighlight = '</SPAN>' #wWhile fingerpritns remain. while index < len(fingerprints): filename = fingerprints[index].loc data = normalizeFileLines(filename).decode() #process the file. output.write('<p>' + fingerprints[index].loc + ':</p>\n') line = 0 col = 0 output.write('<pre>\n') #Go through data one letter at a time. for letter in data: #If its the start of a fingerprint commence highlighting. if (index < len(fingerprints) and line == fingerprints[index].sline and col == fingerprints[index].scol and filename == fingerprints[index].loc): output.write(startHighlight) #If its the end of highlighing for a fingerprint. if (index < len(fingerprints) and line == fingerprints[index].eline and col == fingerprints[index].ecol and filename == fingerprints[index].loc): index += 1 #Increment the index. #If the fingerprints overlap, don't end highlighting if (index < len(fingerprints) and overlap( fingerprints[index - 1], fingerprints[index])): pass else: output.write(endHighlight) if (index < len(fingerprints) and (fingerprints[index].sline - line < 5 or (index > 0 and line - fingerprints[index - 1].eline < 5))): if letter == '<': output.write("<") elif letter == '<': output.write(">") elif letter == '&': output.write("&") elif letter == '\"': output.write(""") else: output.write(letter) col += 1 #If its now a new line, adjust counters accordingly. if letter == '\n': line += 1 col = 0 output.write('</pre>') output.write('</td>')
def scrutinize(filelist, options): fingerprints = collections.defaultdict(list) documents = collections.defaultdict(list) for filename in filelist: data = normalizeFileLines(filename) #print(filename, file=sys.stderr) for fprint in winnowing(kgrams(tokenize(options.language, data, options.comments, options.endlines, options.whitespace, options.text), int(options.size)), int(options.window)): documents[filename].append(fprint) fingerprints[fprint.hash].append(filename) for document, fprints in documents.items(): matches = collections.defaultdict(int) for fprint in fprints: for matching in fingerprints[fprint.hash]: matches[matching] += 1 tmp = [] for key, val in sorted(matches.items()): if val > len(fprints) * 0.1 and key != document: tmp.append((key, val)) if tmp: print(document, ":", len(fprints)) for key, val in tmp: print(' ', key, val)
def scrutinize(filelist, options): fingerprints = collections.defaultdict(list) documents = collections.defaultdict(list) for filename in filelist: data = normalizeFileLines(filename) #print(filename, file=sys.stderr) for fprint in winnowing( kgrams( tokenize(options.language, data, options.comments, options.endlines, options.whitespace, options.text), int(options.size)), int(options.window)): documents[filename].append(fprint) fingerprints[fprint.hash].append(filename) for document, fprints in documents.items(): matches = collections.defaultdict(int) for fprint in fprints: for matching in fingerprints[fprint.hash]: matches[matching] += 1 tmp = [] for key, val in sorted(matches.items()): if val > len(fprints) * 0.1 and key != document: tmp.append((key, val)) if tmp: print(document, ":", len(fprints)) for key, val in tmp: print(' ', key, val)
def examine(filename, options): fingerprints = list() data = normalizeFileLines(filename) for fprint in winnowing( kgrams( tokenize(options.language, data, options.comments, options.endlines, options.whitespace, options.text), int(options.size)), int(options.window)): # Add all the fingerprints to a list and return it. fingerprints.append(fprint) return fingerprints
def examine(filename, options): fingerprints = list() data = normalizeFileLines(filename) for fprint in winnowing(kgrams(tokenize(options.language, data, options.comments, options.endlines, options.whitespace, options.text), int(options.size)), int(options.window)): # Add all the fingerprints to a list and return it. fingerprints.append( fprint) return fingerprints
def highlightAssignment(fingerprints, output): """Highlights fingerprints and outputs into a table entry""" index = 0 output.write("<td>") output.write("<p> Files by " + fingerprints[0].auth + ":</p>\n") # Defines html code to start and end highlighting. startHighlight = '<SPAN style="BACKGROUND-COLOR: #ffff00">' endHighlight = "</SPAN>" # wWhile fingerpritns remain. while index < len(fingerprints): filename = fingerprints[index].loc data = normalizeFileLines(filename).decode() # process the file. output.write("<p>" + fingerprints[index].loc + ":</p>\n") line = 0 col = 0 output.write("<pre>\n") # Go through data one letter at a time. for letter in data: # If its the start of a fingerprint commence highlighting. if ( index < len(fingerprints) and line == fingerprints[index].sline and col == fingerprints[index].scol and filename == fingerprints[index].loc ): output.write(startHighlight) # If its the end of highlighing for a fingerprint. if ( index < len(fingerprints) and line == fingerprints[index].eline and col == fingerprints[index].ecol and filename == fingerprints[index].loc ): index += 1 # Increment the index. # If the fingerprints overlap, don't end highlighting if index < len(fingerprints) and overlap(fingerprints[index - 1], fingerprints[index]): pass else: output.write(endHighlight) if index < len(fingerprints) and ( fingerprints[index].sline - line < 5 or (index > 0 and line - fingerprints[index - 1].eline < 5) ): if letter == "<": output.write("<") elif letter == "<": output.write(">") elif letter == "&": output.write("&") elif letter == '"': output.write(""") else: output.write(letter) col += 1 # If its now a new line, adjust counters accordingly. if letter == "\n": line += 1 col = 0 output.write("</pre>") output.write("</td>")