Exemple #1
0
    def test_module_level(self):
        s1 = """
def main():
    s = 0
    for j in range(10):
        for i in range(10):
            if i > j:
                s += i + j
    print(s)

if __name__ == '__main__':
    main()
"""
        s2 = """
s = 0
for j in range(10):
    for i in range(10):
        if i > j:
            s += i + j
print(s)
"""
        result = pycode_similar.detect([s1, s2], module_level=True)
        sum_plagiarism_percent, *tail = pycode_similar.summarize(result[0][1])
        # s1.main vs s2.__main__ AND s1.__main__ vs s2.__main__
        self.assertGreater(sum_plagiarism_percent, 0.6)

        result = pycode_similar.detect([s2, s1], module_level=True)
        sum_plagiarism_percent, *tail = pycode_similar.summarize(result[0][1])
        # s2.__main__ vs s1.main
        self.assertGreater(sum_plagiarism_percent, 0.85)
Exemple #2
0
    def test_no_function(self):
        s1 = """
def foo(a):
    c = a
            """
        s2 = """
class B(object):
    pass
            """
        try:
            result = pycode_similar.detect([s2, s1])
        except Exception as ex:
            self.assertEqual(ex.source, 0)
        result = pycode_similar.detect([s1, s2])
        self.assertEqual(result[0][1][0].plagiarism_percent, 0)
Exemple #3
0
    def test_space_and_comments(self):
        s1 = """
def foo(a):
    \"""
    foo comments.
    \"""
    if a >= 1:
        return True
        
    # this should return False    
    return False
            """
        s2 = """
def bar(b):
# bar comments.
    if 1 <= b:
        \"""
        This should
        return True
        \"""
        return True
    return False
            """
        result = pycode_similar.detect([s1, s2])
        self.assertEqual(result[0][1][0].plagiarism_percent, 1)
Exemple #4
0
def processpyfiles():
    files = os.listdir(path)
    for pyfile in os.listdir(pypath):
        fp1 = open(pypath + '\\' + pyfile, encoding="utf8")
        txt = fp1.read()
        fp1.close()
    simi_matrix = list()
    code = []

    for file in files:
        fp = open(path + '\\' + file, encoding="utf8")
        filetext = fp.read()
        code.append(filetext)
    print(code)
    #    for ft in filetext:
    #        code.append(ft)

    for file in code:
        try:
            simi_matrix.append(
                dict(
                    pycode_similar.detect(
                        [str(txt), str(file)],
                        diff_method=pycode_similar.UnifiedDiff)).get(1)
                [0].plagiarism_percent)
        except:
            return "SYNTAX ERROR IN FILE"

    if max(simi_matrix) >= .90:
        return "PLAGIARIZED"
    else:
        return "NOT PLAGIARIZED"
Exemple #5
0
    def test_expr(self):
        s1 = """
def foo(a):
    yield c
            """
        s2 = """
def bar(b):
    yield a
            """
        result = pycode_similar.detect([s1, s2])
        self.assertEqual(result[0][1][0].plagiarism_percent, 1)
Exemple #6
0
    def test_strip_print(self):
        s1 = """
def foo(a):
    a = b
    print('abc', a)
            """
        s2 = """
def foo(a):
    print('abc', bar())
    a = b
            """

        result = pycode_similar.detect([s1, s2])
        self.assertEqual(result[0][1][0].plagiarism_percent, 1)
Exemple #7
0
    def test_strip_import(self):
        s1 = """
def foo():
    import sys
    from os import path
            """
        s2 = """
def foo():
    import os
    import ast
    from collections import Counter
            """

        result = pycode_similar.detect([s1, s2])
        self.assertEqual(result[0][1][0].plagiarism_percent, 1)
Exemple #8
0
    def test_keep_prints(self):
        s1 = """
a = []
for j in range(10):
    for i in range(10):
        a.append(i - j)
print(a)
print(abs(el) * el for el in a if abs(el) > 2)
"""
        s2 = """
a = [
    i - j
    for j in range(10)
    for i in range(10)
]
print(a)
print(abs(el) * el for el in a if abs(el) > 2)
"""

        result = pycode_similar.detect([s1, s2], module_level=True, keep_prints=False)
        self.assertLess(result[0][1][0].plagiarism_percent, 0.2)

        result = pycode_similar.detect([s1, s2], module_level=True, keep_prints=True)
        self.assertGreater(result[0][1][0].plagiarism_percent, 0.5)
Exemple #9
0
    def test_gte_lte(self):
        s1 = """
def foo(a):
    if a >= 1:
        return True
    return False
            """
        s2 = """
def bar(b):
    if 1 <= b:
        return True
    return False
            """
        result = pycode_similar.detect([s1, s2])
        self.assertEqual(result[0][1][0].plagiarism_percent, 1)
Exemple #10
0
def compare_file_to_others(ref_file, candidate_files):
    files = [
        ref_file,
    ]
    files.extend(candidate_files)
    print(files)
    payload = []
    for name in files:
        payload.append(read_file_content(name))

    res = pycode_similar.detect(payload,
                                diff_method=pycode_similar.UnifiedDiff)
    per_function_reports = res[0][1]
    total = 0
    for i in per_function_reports:
        total += i.plagiarism_percent
    return total / len(per_function_reports)
Exemple #11
0
    def test_basic_detect(self):
        s1 = """
def foo(a):
    if a > 1:
        return True
    return False
        """
        s2 = """
class A(object):
    def __init__(self, a):
        self._a = a
        
    def bar(self):
        if self._a > 2:
            return True
        return False
        """
        result = pycode_similar.detect([s1, s2])
        self.assertGreater(result[0][1][0].plagiarism_percent, 0.5)
Exemple #12
0
def main():

    filepath1 = './' + sys.argv[1]
    filepath2 = './' + sys.argv[2]

    f1 = open(filepath1, 'r')
    f2 = open(filepath2, 'r')

    results = pycode_similar.detect([f1.read(), f2.read()])
    result = 0

    for index, func_ast_diff_list in results:
        sum_total_count = sum(func_diff_info.total_count
                              for func_diff_info in func_ast_diff_list)
        sum_plagiarism_count = sum(func_diff_info.plagiarism_count
                                   for func_diff_info in func_ast_diff_list)
        result = (
            '{:.2f} % ({}/{}) of ref code structure is plagiarized by candidate.'
            .format(sum_plagiarism_count / float(sum_total_count) * 100,
                    sum_plagiarism_count, sum_total_count))

    print(result)
    sys.stdout.flush()
def checkPlagiarismForFiles(config, args, f1, f2):
    if args.threshold > 99:
        return checkForPlagiarismByDiff(config, f1, f2)
    c1 = readFile(f1)
    c2 = readFile(f2)
    if c1 == c2:
        print(f'Plagiarism detected: {f1} and {f2} are identical')
        return True
    try:
        res = pycode_similar.detect([c1, c2], diff_method=pycode_similar.UnifiedDiff, keep_prints=True, module_level=True)
    except Exception as e:
        msg = f'Cannot check plagiarism between {f1} and {f2}: ' + str(e)
        if isinstance(e, SyntaxError):
            verbose(msg)
        else:
            warn(msg)
        return
    if len(res) == 0:
        warn(f'No plagiarism result found for {f1} and {f2}')
    if len(res) > 1:
        warn(f'More than one plagiarism result found for {f1} and {f2}')
    (_, x) = res[0]
    sumPlagiarismFactor, sumPlagiarismCount, sumTotalCount = pycode_similar.summarize(x)
    sumPlagiarismPercent = sumPlagiarismFactor * 100
    msg = '{:.2f} % ({}/{}) of ref code structure is plagiarized by candidate.'.format(
        sumPlagiarismPercent,
        sumPlagiarismCount,
        sumTotalCount,
    )
    if sumPlagiarismPercent >= args.threshold:
        print(f'Detected plagiarism between {f1} and {f2}: {msg}')
        if args.verbose:
            shell.run(['diff', '-u', f1, f2], onError='ignore')
        return True
    else:
        verbose(f'No plagiarism between {f1} and {f2}')
        return False
def compare_file_to_others(ref_file, candidate_files):
    files = [
        ref_file,
    ]
    files.extend(candidate_files)
    print(files)
    payload = []
    for name in files:
        try:
            content = read_file_content(name)
            _ = ast.parse(content)
            payload.append(content)
        except SyntaxError:
            pass

    res = pycode_similar.detect(payload,
                                diff_method=pycode_similar.UnifiedDiff)
    per_function_reports = res[0][1]
    if not per_function_reports:
        return 0
    total = 0
    for i in per_function_reports:
        total += i.plagiarism_percent
    return total / len(per_function_reports)
Exemple #15
0
 def compare(self, a, b):
     res = pycode_similar.detect([a, b], diff_method=pycode_similar.UnifiedDiff)
     val = res[0][1][0].plagiarism_percent
     return val
plt.title('students')
plt.xlabel('time')
plt.ylabel('signal')
plt.show()
# learn to use matplotlib


class SimilarDetector:
    def __init__(self, codes):
        self.code_list = codes

    def detect(self):
        pycode_similar.detect(self.code_list,
                              diff_method=pycode_similar.UnifiedDiff)


res = pycode_similar.detect(
    ['''
def p():
    print(123)''', '''
def f():
    print(123)'''],
    diff_method=pycode_similar.TreeDiff)
funcInfos = res[0][1]
sum_plagiarism_count = 0
sum_total_count = 0
for func_info in funcInfos:
    sum_plagiarism_count += func_info.plagiarism_count
    sum_total_count += func_info.total_count
print(sum_plagiarism_count / sum_total_count)
 def detect(self):
     pycode_similar.detect(self.code_list,
                           diff_method=pycode_similar.UnifiedDiff)
Exemple #18
0
            OTHER = []
            x_old = measurement[0]
            y_old = measurement[1]
        else:
            x_old = OTHER[-1][0]
            y_old = OTHER[-1][1]

        x = measurement[0]
        y = measurement[1]

        if len(OTHER) >= 3:
            x_old2 = OTHER[-2][0]
            y_old2 = OTHER[-2][1]
        else:
            x_old2 = x_old
            y_old2 = y_old
        bearing = atan2(y - y_old, x - x_old)
        bearing_old = atan2(y_old - y_old2, x_old - x_old2)
        theta = bearing - bearing_old
        d = sqrt((y - y_old) ** 2 + (x - x_old) ** 2)

        x_new = x + d * cos(theta + bearing)
        y_new = y + d * sin(theta + bearing)
        xy_estimate = (x_new, y_new)
        # You must return xy_estimate (x, y), and OTHER (even if it is None)
        # in this order for grading purposes.
        OTHER.append(measurement)
        return xy_estimate, OTHER"""

print(pycode_similar.detect([str1,str2]))
	optional arguments:
	  -h, --help          show this help message and exit
	  -l L                if AST line of the function >= value then output detail (default: 4)
	  -p P                if plagiarism percentage of the function >= value then output detail (default: 0.5)
	  -k, --keep-prints   keep print nodes
	  -m, --module-level  process module level nodes

	pycode_similar: error: too few arguments

Of course, you can use it as a python library, too.

.. code-block:: python

	import pycode_similar
	pycode_similar.detect([referenced_code_str, candidate_code_str1, candidate_code_str2, ...], diff_method=pycode_similar.UnifiedDiff, keep_prints=False, module_level=False)
	
	
Implementation
--------------
This tool has implemented two diff methods: line based diff(UnifiedDiff) and tree edit distance based diff(TreeDiff), both of them are run in function AST level.

- UnifiedDiff, diff normalized function AST string lines, naive but efficiency.
- TreeDiff, diff function AST, very slow and the result is not good for small functions. (depends on `zss  <https://pypi.python.org/pypi/zss>`_)

So, when run this tool in cmd, the default diff method is UnifiedDiff. And you can switch to TreeDiff when use it as a library.


Testing
--------------
If you have the source code you can run the tests with
Exemple #20
0
import pycode_similar

#sys.path.insert(0, '/home/leonard/Faculdades/ESTACIO/Projetos/IC/2017-2018/Luiz/Codigos/src/modules/Plagiarism-Checker-master')

#rom textpreforrkr import main_method
pycode_similar.detect(['Q1_gabarito','Q1_resposta1'], diff_method=UnifiedDiff)
#main_method(['Q1_gabarito','Q1_resposta1'],'resp')