Beispiel #1
0
def convert_to_html_file(notebook_path):
    with (open(notebook_path, "r")) as notebook_file:
        notebook_data = _read_notebook(notebook_file)
        html = _convert_notebook_to_html(notebook_data)

        notebook_html_file = create_temp_file()
        write_to_file(html, notebook_html_file)

        return notebook_html_file
Beispiel #2
0
    def test_write_to_file_when_file_does_not_exist(self):
        path = "../out/data.txt"
        try:
            os.remove(path)
        except FileNotFoundError:
            pass

        self.assertFalse(write_to_file(path))
Beispiel #3
0
def get_bouquets(input_file_name: str, output_file_name: str):
    """
    Main function for this task. This function is being called in script.py file.
    It handles all bouquets and does the following:
        1. reads input info from 'input_file_name' file.
        2. handles bouquets and flowers info.
        3. writes output info into 'output_file_name' file.
    """
    total_bouquets, total_flowers = read_from_file(file_name=input_file_name)

    bouquets_for_writing_to_file = list()

    for bouquet in total_bouquets:
        handled_bouquet = handle_bouquet(bouquet=bouquet,
                                         total_flowers=total_flowers)
        if handled_bouquet:
            bouquets_for_writing_to_file.append(handled_bouquet)

    write_to_file(file_name=output_file_name,
                  data=bouquets_for_writing_to_file)
Beispiel #4
0
def lsc_driver(cur_loc, log_file):
    '''
    Driver function for the LSC algorithm.
    :param loc: Directory location.
    :return: <>
    '''

    file_list = files.get_files_in_dir(cur_loc)

    if len(file_list) == 0:
        print('LONGEST COMMON SUBSTRING: No text files found! Exiting.')
        files.write_to_file(
            log_file,
            '\nLONGEST COMMON SUBSTRING: No text files found! Exiting.\n')
        return []

    line_list = []
    for f in file_list:
        line_list.append(files.read_lines_in_file(cur_loc + '\\' + f))
        # print(line_list)

    new_line_list = []
    for line in line_list:
        new_line = list(map(common.beautify, line))
        # print(new_line)

        new_line_list.append(new_line)
    print('\n')
    print(new_line_list)

    lsc_matrix = []
    results = []
    for i in range(len(new_line_list)):
        for j in range(len(new_line_list)):

            if i == len(lsc_matrix):
                lsc_matrix.append([])

            try:
                lsc_matrix[i].append(lsc_matrix[j][i])
            except:

                if i == j:
                    lsc_matrix[i].append(-1)
                else:
                    lsc_matrix[i].append(
                        get_longest_common_substring(new_line_list[i][0],
                                                     new_line_list[j][0]))

                if (lsc_matrix[i][j] >= (70)) and i != j:
                    results.append(
                        (file_list[i], file_list[j], lsc_matrix[i][j]))

    print('\n\nLONGEST COMMON SUBSEQUENCE:\n')
    files.write_to_file(log_file, '\n\nLONGEST COMMON SUBSEQUENCE:\n')

    for res in results:
        files.write_to_file(
            log_file, '\'' + res[0] + '\' and \'' + res[1] +
            '\' are similar enough (' + str(res[2]) +
            '% similarity) to each other (to suspect plagiarism).\n')
        print('\'' + res[0] + '\' and \'' + res[1] +
              '\' are similar enough (' + str(res[2]) +
              '% similarity) to each other (to suspect plagiarism).')

    return lsc_matrix
Beispiel #5
0
def main():

    # Get the present working directory
    cur_dir = os.path.abspath(os.curdir)

    # Greeting message
    print('\n\n****************** PLAGIARISM DETECTOR ******************',
          end='')
    print('\n ******************* SRIKANTH KAVURI *******************\n')

    # Commented out and included this functionality in proceed() in common.py
    # stop = False
    # while not stop:
    #     choice = input('Default directory is ' + cur_dir + '. Continue? ').strip()
    #     if choice.lower() == 'y' or choice.lower() == 'n':
    #         stop = True
    #     else:
    #         # Validation
    #         print('Invalid.\n')

    question = 'Default directory is ' + cur_dir + '. Continue? '
    if not common.proceed(question):
        stop = False
        while not stop:
            new_dir = input('Enter new directory: ').strip()
            # Check if the entered path is a valid path in the system.
            if os.path.isdir(new_dir):
                stop = True
                cur_dir = new_dir
            else:
                print('Invalid.\n')

    # Giving the user the option of enabling log files.
    question = '\nEnable logging? '
    if common.proceed(question):
        log_file_name = files.create_log_file(cur_dir)
    else:
        log_file_name = ''

    print('\n')

    # Call driver functions for Bag of Words and LCS
    # bag_matrix = Matrix(bag_of_words.bag_driver(cur_dir, log_file_name))
    # lsc_matrix = Matrix(lsc.lsc_driver(cur_dir, log_file_name))

    bag_matrix = (bag_of_words.bag_driver(cur_dir, log_file_name))
    lsc_matrix = (lsc.lsc_driver(cur_dir, log_file_name))

    # Print results
    print('\nBag of words matrix:')
    print(bag_matrix)
    print('\nLCS matrix:')
    print(lsc_matrix)

    # Write results to log file, if it exists.
    # If the log file doesn't exist, then the error is just ignored.
    files.write_to_file(log_file_name, '\nBag of words matrix:\n')
    files.write_to_file(log_file_name, str(bag_matrix))
    files.write_to_file(log_file_name, '\nLCS matrix:\n')
    files.write_to_file(log_file_name, str(lsc_matrix))
    files.write_to_file(log_file_name, '\n')
Beispiel #6
0
    def test_write_to_file_when_file_exists(self):
        path = "../out/data.txt"
        with open(path, 'w'):
            pass

        self.assertTrue(write_to_file(path))
Beispiel #7
0
def bag_driver(cur_dir, log_file):
    '''
    Driver program for bag of words.
    :param cur_dir: Current working directory. This is where the files are.
    :return: <>
    '''

    # Get all files in cur_dir as a list
    file_list = files.get_files_in_dir(cur_dir)

    if len(file_list) == 0:
        print('BAG OF WORDS: No text files found! Exiting.')
        files.write_to_file(log_file,
                            '\nBAG OF WORDS: No text files found! Exiting.\n')
        return []

    # vecs will be a list of tuples.
    # This is done to make sure that we can keep track of which vector is for
    # which file.
    vecs = []

    for file in file_list:
        vector = create_vector_for_file(cur_dir + '\\' + file)
        # print(sorted(vector.elements()))

        vecs.append(vector)

    angles = []
    results = []

    # Get the angle between every pair of vectors.
    for i in range(len(file_list)):
        for j in range(len(file_list)):
            # print(angles)

            # Create the list at position i.
            if i == len(angles):
                angles.append([])

            try:
                # Angle between A and B is the same as angle between B and A.
                angles[i].append(angles[j][i])
            except:

                if i == j:
                    # Comparing a file with itself makes no sense.
                    angles[i].append(-1)
                else:
                    # Calculate angle
                    angles[i].append(100 *
                                     (round(get_angle(vecs[i], vecs[j]), 2)))

                # Similar enough to suspect plagiarism?
                if (angles[i][j] >= 70) and i != j:
                    # print(angles[i][j], vecs[i].length)
                    results.append((file_list[i], file_list[j], angles[i][j]))

    # for item in angles:
    #     if len(item) == 0:
    #         angles.remove(item)
    # print(angles)

    # Printouts
    print('\nFor your reference:')
    for ind in range(len(file_list)):
        print('File number ' + str(ind) + ' corresponds to file ' +
              file_list[ind])
    print('\n')

    files.write_to_file(log_file, '\nFor your reference:\n')
    for ind in range(len(file_list)):
        files.write_to_file(
            log_file, 'File number ' + str(ind) + ' corresponds to file ' +
            file_list[ind] + '\n')
    files.write_to_file(log_file, '\n')
    files.write_to_file(log_file, '\n\nBAG OF WORDS:\n')

    print('\n\nBAG OF WORDS:\n')

    for res in results:
        files.write_to_file(
            log_file, '\'' + res[0] + '\' and \'' + res[1] +
            '\' are similar enough (' + str(res[2]) +
            '% similarity) to each other (to suspect plagiarism).\n')
        print('\'' + res[0] + '\' and \'' + res[1] +
              '\' are similar enough (' + str(res[2]) +
              '% similarity) to each other (to suspect plagiarism).')

    return angles