def convert_to_html_file(notebook_path): with (open(notebook_path, "r")) as notebook_file: notebook_data = _read_notebook(notebook_file) html = _convert_notebook_to_html(notebook_data) notebook_html_file = create_temp_file() write_to_file(html, notebook_html_file) return notebook_html_file
def test_write_to_file_when_file_does_not_exist(self): path = "../out/data.txt" try: os.remove(path) except FileNotFoundError: pass self.assertFalse(write_to_file(path))
def get_bouquets(input_file_name: str, output_file_name: str): """ Main function for this task. This function is being called in script.py file. It handles all bouquets and does the following: 1. reads input info from 'input_file_name' file. 2. handles bouquets and flowers info. 3. writes output info into 'output_file_name' file. """ total_bouquets, total_flowers = read_from_file(file_name=input_file_name) bouquets_for_writing_to_file = list() for bouquet in total_bouquets: handled_bouquet = handle_bouquet(bouquet=bouquet, total_flowers=total_flowers) if handled_bouquet: bouquets_for_writing_to_file.append(handled_bouquet) write_to_file(file_name=output_file_name, data=bouquets_for_writing_to_file)
def lsc_driver(cur_loc, log_file): ''' Driver function for the LSC algorithm. :param loc: Directory location. :return: <> ''' file_list = files.get_files_in_dir(cur_loc) if len(file_list) == 0: print('LONGEST COMMON SUBSTRING: No text files found! Exiting.') files.write_to_file( log_file, '\nLONGEST COMMON SUBSTRING: No text files found! Exiting.\n') return [] line_list = [] for f in file_list: line_list.append(files.read_lines_in_file(cur_loc + '\\' + f)) # print(line_list) new_line_list = [] for line in line_list: new_line = list(map(common.beautify, line)) # print(new_line) new_line_list.append(new_line) print('\n') print(new_line_list) lsc_matrix = [] results = [] for i in range(len(new_line_list)): for j in range(len(new_line_list)): if i == len(lsc_matrix): lsc_matrix.append([]) try: lsc_matrix[i].append(lsc_matrix[j][i]) except: if i == j: lsc_matrix[i].append(-1) else: lsc_matrix[i].append( get_longest_common_substring(new_line_list[i][0], new_line_list[j][0])) if (lsc_matrix[i][j] >= (70)) and i != j: results.append( (file_list[i], file_list[j], lsc_matrix[i][j])) print('\n\nLONGEST COMMON SUBSEQUENCE:\n') files.write_to_file(log_file, '\n\nLONGEST COMMON SUBSEQUENCE:\n') for res in results: files.write_to_file( log_file, '\'' + res[0] + '\' and \'' + res[1] + '\' are similar enough (' + str(res[2]) + '% similarity) to each other (to suspect plagiarism).\n') print('\'' + res[0] + '\' and \'' + res[1] + '\' are similar enough (' + str(res[2]) + '% similarity) to each other (to suspect plagiarism).') return lsc_matrix
def main(): # Get the present working directory cur_dir = os.path.abspath(os.curdir) # Greeting message print('\n\n****************** PLAGIARISM DETECTOR ******************', end='') print('\n ******************* SRIKANTH KAVURI *******************\n') # Commented out and included this functionality in proceed() in common.py # stop = False # while not stop: # choice = input('Default directory is ' + cur_dir + '. Continue? ').strip() # if choice.lower() == 'y' or choice.lower() == 'n': # stop = True # else: # # Validation # print('Invalid.\n') question = 'Default directory is ' + cur_dir + '. Continue? ' if not common.proceed(question): stop = False while not stop: new_dir = input('Enter new directory: ').strip() # Check if the entered path is a valid path in the system. if os.path.isdir(new_dir): stop = True cur_dir = new_dir else: print('Invalid.\n') # Giving the user the option of enabling log files. question = '\nEnable logging? ' if common.proceed(question): log_file_name = files.create_log_file(cur_dir) else: log_file_name = '' print('\n') # Call driver functions for Bag of Words and LCS # bag_matrix = Matrix(bag_of_words.bag_driver(cur_dir, log_file_name)) # lsc_matrix = Matrix(lsc.lsc_driver(cur_dir, log_file_name)) bag_matrix = (bag_of_words.bag_driver(cur_dir, log_file_name)) lsc_matrix = (lsc.lsc_driver(cur_dir, log_file_name)) # Print results print('\nBag of words matrix:') print(bag_matrix) print('\nLCS matrix:') print(lsc_matrix) # Write results to log file, if it exists. # If the log file doesn't exist, then the error is just ignored. files.write_to_file(log_file_name, '\nBag of words matrix:\n') files.write_to_file(log_file_name, str(bag_matrix)) files.write_to_file(log_file_name, '\nLCS matrix:\n') files.write_to_file(log_file_name, str(lsc_matrix)) files.write_to_file(log_file_name, '\n')
def test_write_to_file_when_file_exists(self): path = "../out/data.txt" with open(path, 'w'): pass self.assertTrue(write_to_file(path))
def bag_driver(cur_dir, log_file): ''' Driver program for bag of words. :param cur_dir: Current working directory. This is where the files are. :return: <> ''' # Get all files in cur_dir as a list file_list = files.get_files_in_dir(cur_dir) if len(file_list) == 0: print('BAG OF WORDS: No text files found! Exiting.') files.write_to_file(log_file, '\nBAG OF WORDS: No text files found! Exiting.\n') return [] # vecs will be a list of tuples. # This is done to make sure that we can keep track of which vector is for # which file. vecs = [] for file in file_list: vector = create_vector_for_file(cur_dir + '\\' + file) # print(sorted(vector.elements())) vecs.append(vector) angles = [] results = [] # Get the angle between every pair of vectors. for i in range(len(file_list)): for j in range(len(file_list)): # print(angles) # Create the list at position i. if i == len(angles): angles.append([]) try: # Angle between A and B is the same as angle between B and A. angles[i].append(angles[j][i]) except: if i == j: # Comparing a file with itself makes no sense. angles[i].append(-1) else: # Calculate angle angles[i].append(100 * (round(get_angle(vecs[i], vecs[j]), 2))) # Similar enough to suspect plagiarism? if (angles[i][j] >= 70) and i != j: # print(angles[i][j], vecs[i].length) results.append((file_list[i], file_list[j], angles[i][j])) # for item in angles: # if len(item) == 0: # angles.remove(item) # print(angles) # Printouts print('\nFor your reference:') for ind in range(len(file_list)): print('File number ' + str(ind) + ' corresponds to file ' + file_list[ind]) print('\n') files.write_to_file(log_file, '\nFor your reference:\n') for ind in range(len(file_list)): files.write_to_file( log_file, 'File number ' + str(ind) + ' corresponds to file ' + file_list[ind] + '\n') files.write_to_file(log_file, '\n') files.write_to_file(log_file, '\n\nBAG OF WORDS:\n') print('\n\nBAG OF WORDS:\n') for res in results: files.write_to_file( log_file, '\'' + res[0] + '\' and \'' + res[1] + '\' are similar enough (' + str(res[2]) + '% similarity) to each other (to suspect plagiarism).\n') print('\'' + res[0] + '\' and \'' + res[1] + '\' are similar enough (' + str(res[2]) + '% similarity) to each other (to suspect plagiarism).') return angles