Example #1
0
def highlightAssignment(fingerprints, output):
    """Highlights fingerprints and outputs into a table entry"""

    index = 0
    output.write('<td>')
    output.write('<p> Files by ' + fingerprints[0].auth + ':</p>\n')
    #Defines html code to start and end highlighting.
    startHighlight = '<SPAN style="BACKGROUND-COLOR: #ffff00">'
    endHighlight = '</SPAN>'

    #wWhile fingerpritns remain.
    while index < len(fingerprints):

        filename = fingerprints[index].loc
        data = normalizeFileLines(filename).decode()  #process the file.
        output.write('<p>' + fingerprints[index].loc + ':</p>\n')
        line = 0
        col = 0
        output.write('<pre>\n')
        #Go through data one letter at a time.
        for letter in data:

            #If its the start of a fingerprint commence highlighting.
            if (index < len(fingerprints) and line == fingerprints[index].sline
                    and col == fingerprints[index].scol
                    and filename == fingerprints[index].loc):
                output.write(startHighlight)
            #If its the end of highlighing for a fingerprint.
            if (index < len(fingerprints) and line == fingerprints[index].eline
                    and col == fingerprints[index].ecol
                    and filename == fingerprints[index].loc):
                index += 1  #Increment the index.
                #If the fingerprints overlap, don't end highlighting
                if (index < len(fingerprints) and overlap(
                        fingerprints[index - 1], fingerprints[index])):
                    pass
                else:
                    output.write(endHighlight)
            if (index < len(fingerprints) and
                (fingerprints[index].sline - line < 5 or
                 (index > 0 and line - fingerprints[index - 1].eline < 5))):
                if letter == '<':
                    output.write("&lt;")
                elif letter == '<':
                    output.write("&gt;")
                elif letter == '&':
                    output.write("&amp;")
                elif letter == '\"':
                    output.write("&quot;")
                else:
                    output.write(letter)
            col += 1
            #If its now a new line, adjust counters accordingly.
            if letter == '\n':
                line += 1
                col = 0

        output.write('</pre>')

    output.write('</td>')
Example #2
0
def scrutinize(filelist, options):
    fingerprints = collections.defaultdict(list)
    documents = collections.defaultdict(list)

    for filename in filelist:
        data = normalizeFileLines(filename)
        #print(filename, file=sys.stderr)
        for fprint in winnowing(kgrams(tokenize(options.language, data,
                                                      options.comments,
                                                      options.endlines,
                                                      options.whitespace,
                                                      options.text),
                                    int(options.size)),
                                    int(options.window)):
            documents[filename].append(fprint)
            fingerprints[fprint.hash].append(filename)
    for document, fprints in documents.items():
        matches = collections.defaultdict(int)
        for fprint in fprints:
            for matching in fingerprints[fprint.hash]:
                matches[matching] += 1
        tmp = []
        for key, val in sorted(matches.items()):
            if val > len(fprints) * 0.1 and key != document:
                tmp.append((key, val))
        if tmp:
            print(document, ":", len(fprints))
            for key, val in tmp:
                print('   ', key, val)
Example #3
0
def scrutinize(filelist, options):
    fingerprints = collections.defaultdict(list)
    documents = collections.defaultdict(list)

    for filename in filelist:
        data = normalizeFileLines(filename)
        #print(filename, file=sys.stderr)
        for fprint in winnowing(
                kgrams(
                    tokenize(options.language, data, options.comments,
                             options.endlines, options.whitespace,
                             options.text), int(options.size)),
                int(options.window)):
            documents[filename].append(fprint)
            fingerprints[fprint.hash].append(filename)
    for document, fprints in documents.items():
        matches = collections.defaultdict(int)
        for fprint in fprints:
            for matching in fingerprints[fprint.hash]:
                matches[matching] += 1
        tmp = []
        for key, val in sorted(matches.items()):
            if val > len(fprints) * 0.1 and key != document:
                tmp.append((key, val))
        if tmp:
            print(document, ":", len(fprints))
            for key, val in tmp:
                print('   ', key, val)
Example #4
0
def examine(filename, options):
    fingerprints = list()

    data = normalizeFileLines(filename)
    for fprint in winnowing(
            kgrams(
                tokenize(options.language, data, options.comments,
                         options.endlines, options.whitespace, options.text),
                int(options.size)), int(options.window)):
        # Add all the fingerprints to a list and return it.
        fingerprints.append(fprint)
    return fingerprints
Example #5
0
def examine(filename, options):
    fingerprints  = list()
    
    data = normalizeFileLines(filename)
    for fprint in winnowing(kgrams(tokenize(options.language, data,
                                                  options.comments,
                                                  options.endlines,
                                                  options.whitespace,
                                                  options.text),
                                                  int(options.size)),
                                                  int(options.window)):
        # Add all the fingerprints to a list and return it.
        fingerprints.append( fprint) 
    return fingerprints        
Example #6
0
def highlightAssignment(fingerprints, output):
    """Highlights fingerprints and outputs into a table entry"""

    index = 0
    output.write("<td>")
    output.write("<p> Files by " + fingerprints[0].auth + ":</p>\n")
    # Defines html code to start and end highlighting.
    startHighlight = '<SPAN style="BACKGROUND-COLOR: #ffff00">'
    endHighlight = "</SPAN>"

    # wWhile fingerpritns remain.
    while index < len(fingerprints):

        filename = fingerprints[index].loc
        data = normalizeFileLines(filename).decode()  # process the file.
        output.write("<p>" + fingerprints[index].loc + ":</p>\n")
        line = 0
        col = 0
        output.write("<pre>\n")
        # Go through data one letter at a time.
        for letter in data:

            # If its the start of a fingerprint commence highlighting.
            if (
                index < len(fingerprints)
                and line == fingerprints[index].sline
                and col == fingerprints[index].scol
                and filename == fingerprints[index].loc
            ):
                output.write(startHighlight)
            # If its the end of highlighing for a fingerprint.
            if (
                index < len(fingerprints)
                and line == fingerprints[index].eline
                and col == fingerprints[index].ecol
                and filename == fingerprints[index].loc
            ):
                index += 1  # Increment the index.
                # If the fingerprints overlap, don't end highlighting
                if index < len(fingerprints) and overlap(fingerprints[index - 1], fingerprints[index]):
                    pass
                else:
                    output.write(endHighlight)
            if index < len(fingerprints) and (
                fingerprints[index].sline - line < 5 or (index > 0 and line - fingerprints[index - 1].eline < 5)
            ):
                if letter == "<":
                    output.write("&lt;")
                elif letter == "<":
                    output.write("&gt;")
                elif letter == "&":
                    output.write("&amp;")
                elif letter == '"':
                    output.write("&quot;")
                else:
                    output.write(letter)
            col += 1
            # If its now a new line, adjust counters accordingly.
            if letter == "\n":
                line += 1
                col = 0

        output.write("</pre>")

    output.write("</td>")