def main(file): jsondb = JsonPlagDB("db.json") # create signature print("Creating signatures") sig = sherlock.signature(file) print("Signatures created") # check for equal signatures same_sig = 0 for _hash in sig: hash = str(_hash) ret = jsondb.lookup(hash) if ret is not None: same_sig += 1 percent = 100.0/len(sig)*same_sig print("number of signatures: " + str(len(sig))); print("number of equal signatures: " + str(same_sig)); print("similarity: %s%%" % str(percent)); # insert signatures of this document into db ref = PlagReference(file) for _hash in sig: hash = str(_hash) jsondb.update(hash, ref)
def main(file): jsondb = JsonPlagDB("db.json") # create signature print("Creating signatures") sig = sherlock.signature(file) print("Signatures created") # check for equal signatures same_sig = 0 for _hash in sig: hash = str(_hash) ret = jsondb.lookup(hash) if ret is not None: same_sig += 1 percent = 100.0 / len(sig) * same_sig print("number of signatures: " + str(len(sig))) print("number of equal signatures: " + str(same_sig)) print("similarity: %s%%" % str(percent)) # insert signatures of this document into db ref = PlagReference(file) for _hash in sig: hash = str(_hash) jsondb.update(hash, ref)
def main(testset_path): #db = JsonPlagDB("db.json") db = LmdbPlagDB("/tmp/plagdb.lmdb") source_list = glob.glob(testset_path + "/" + source_pattern + "*.txt") suspicious_list = glob.glob(testset_path + "/" + suspicious_pattern + "*.txt") # create signatures i = 0 for file in source_list: i += 1 if i == max_docs: break ref = PlagReference(file) print("Create signature for " + ref.filename) sig = sherlock.signature(file) db.update_batch(sig, ref) #for _hash in sig: # hash = str(_hash) # db.update(hash, ref) # check for equal signatures i = 0 for file in suspicious_list: i += 1 if i == max_docs: break sig = sherlock.signature(file) same_sig = 0 for _hash in sig: hash = str(_hash) ret = db.lookup(hash) if ret is not None: same_sig += 1 percent = 100.0 / len(sig) * same_sig print("similarity: %s%%" % str(percent))
def main(testset_path): #db = JsonPlagDB("db.json") db = LmdbPlagDB("/tmp/plagdb.lmdb") source_list = glob.glob(testset_path + "/" + source_pattern + "*.txt") suspicious_list = glob.glob(testset_path + "/" + suspicious_pattern + "*.txt") # create signatures i = 0 for file in source_list: i += 1 if i == max_docs: break ref = PlagReference(file) print("Create signature for " + ref.filename) sig = sherlock.signature(file) db.update_batch(sig, ref) #for _hash in sig: # hash = str(_hash) # db.update(hash, ref) # check for equal signatures i = 0 for file in suspicious_list: i += 1 if i == max_docs: break sig = sherlock.signature(file) same_sig = 0 for _hash in sig: hash = str(_hash) ret = db.lookup(hash) if ret is not None: same_sig += 1 percent = 100.0/len(sig)*same_sig print("similarity: %s%%" % str(percent));