def buildMaster(gathered): master = {} # Put all the fingerprints into a single dictionary for assignment in gathered: for element in assignment.values(): for entry in element: createOrAppend(master, entry.hash, entry) return master
def compareAll(assignment, master, matches): # Calculates inersection between assignments. for key in assignment.keys(): if key not in master: continue data = master[key] # Check for fingerprints with same hash in the master. for entry in data: # If there is a match and its not by the same author, add it. if entry.auth != assignment[key][0].auth: createOrAppend(matches, entry.auth, entry)
def vsDB(assignment, matches, db_path, lang): import sqlite3 conn = sqlite3.connect(db_path) cur = conn.cursor() lang, auth = getTableNames(lang) query_string = "select hash, sline, scol, eline, ecol, auth, path" query_string += "from " + lang + ", " + auth + " where auth<>? and " query_string += "hash=? and " + lang + ".fileid=" + auth + ".rowid" # queryString = 'select hash, sline, scol, eline, ecol, auth, path from ' + lang + ' where auth<>? and hash=?' for key in assignment.keys(): target = (assignment[key][0].auth, key) for row in cur.execute(query_string, target): createOrAppend(matches, row["auth"], Entry(*row))
def processFolder(path, merge, author, options): """Returns fingerprints of all files found in folder Runs recursively on all folders found there of. Adds all fingerprints to the dictionary its passed.""" #Path juggling. current = os.getcwd() os.chdir(path) targets = os.listdir(".") for entry in targets: if os.path.isdir(entry): #If folder, recurse. processFolder(entry, merge, author, options) else: temp = examine(entry, options) for tup in temp: #Add to dictionary. entry = Entry(*tup, auth=author, path=os.path.abspath(entry)) createOrAppend(merge, tup.hash, entry) os.chdir(current)