コード例 #1
0
ファイル: single_test.py プロジェクト: flozzone/plagcheck
def main(file):
    jsondb = JsonPlagDB("db.json")

    # create signature
    print("Creating signatures")
    sig = sherlock.signature(file)
    print("Signatures created")

    # check for equal signatures
    same_sig = 0
    for _hash in sig:
        hash = str(_hash)
        ret = jsondb.lookup(hash)
        if ret is not None:
            same_sig += 1

    percent = 100.0/len(sig)*same_sig

    print("number of signatures: " + str(len(sig)));
    print("number of equal signatures: " + str(same_sig));
    print("similarity: %s%%" % str(percent));

    # insert signatures of this document into db
    ref = PlagReference(file)
    for _hash in sig:
        hash = str(_hash)
        jsondb.update(hash, ref)
コード例 #2
0
ファイル: single_test.py プロジェクト: flozzone/plagcheck
def main(file):
    jsondb = JsonPlagDB("db.json")

    # create signature
    print("Creating signatures")
    sig = sherlock.signature(file)
    print("Signatures created")

    # check for equal signatures
    same_sig = 0
    for _hash in sig:
        hash = str(_hash)
        ret = jsondb.lookup(hash)
        if ret is not None:
            same_sig += 1

    percent = 100.0 / len(sig) * same_sig

    print("number of signatures: " + str(len(sig)))
    print("number of equal signatures: " + str(same_sig))
    print("similarity: %s%%" % str(percent))

    # insert signatures of this document into db
    ref = PlagReference(file)
    for _hash in sig:
        hash = str(_hash)
        jsondb.update(hash, ref)
コード例 #3
0
ファイル: testsuite.py プロジェクト: flozzone/plagcheck
def main(testset_path):
    #db = JsonPlagDB("db.json")
    db = LmdbPlagDB("/tmp/plagdb.lmdb")

    source_list = glob.glob(testset_path + "/" + source_pattern + "*.txt")
    suspicious_list = glob.glob(testset_path + "/" + suspicious_pattern +
                                "*.txt")

    # create signatures
    i = 0
    for file in source_list:
        i += 1
        if i == max_docs:
            break

        ref = PlagReference(file)
        print("Create signature for " + ref.filename)
        sig = sherlock.signature(file)

        db.update_batch(sig, ref)

        #for _hash in sig:
        #    hash = str(_hash)
        #    db.update(hash, ref)

    # check for equal signatures
    i = 0
    for file in suspicious_list:
        i += 1
        if i == max_docs:
            break
        sig = sherlock.signature(file)
        same_sig = 0
        for _hash in sig:
            hash = str(_hash)
            ret = db.lookup(hash)
            if ret is not None:
                same_sig += 1

        percent = 100.0 / len(sig) * same_sig
        print("similarity: %s%%" % str(percent))
コード例 #4
0
ファイル: testsuite.py プロジェクト: flozzone/plagcheck
def main(testset_path):
    #db = JsonPlagDB("db.json")
    db = LmdbPlagDB("/tmp/plagdb.lmdb")

    source_list = glob.glob(testset_path + "/" + source_pattern + "*.txt")
    suspicious_list = glob.glob(testset_path + "/" + suspicious_pattern + "*.txt")

    # create signatures
    i = 0
    for file in source_list:
        i += 1
        if i == max_docs:
            break

        ref = PlagReference(file)
        print("Create signature for " + ref.filename)
        sig = sherlock.signature(file)

        db.update_batch(sig, ref)

        #for _hash in sig:
        #    hash = str(_hash)
        #    db.update(hash, ref)

    # check for equal signatures
    i = 0
    for file in suspicious_list:
        i += 1
        if i == max_docs:
            break
        sig = sherlock.signature(file)
        same_sig = 0
        for _hash in sig:
            hash = str(_hash)
            ret = db.lookup(hash)
            if ret is not None:
                same_sig += 1

        percent = 100.0/len(sig)*same_sig
        print("similarity: %s%%" % str(percent));