コード例 #1
0
			#print("Error while parsing PDF file!")
			colour_print("Error while parsing PDF file!", core = Fore.RED)
			exit(1)

		end_time = time()
		print("Parsing PDF took {0:.3f}".format(end_time - start_time), "seconds")

	except OSError:
		print("Error while trying to parse pdf file!")
		exit(1)

#Getting words from a txt file
print("\nStarted parsing TXT, wait for a while...")
start_time = time()
#text = split.get_list(input_file, enableComments = False)
text = split.get_text(input_file)
end_time = time()
print("Parsing TXT took {0:.3f}".format(end_time - start_time), "seconds")

#Getting words for deleting
if (stopwords_file == ''):
	stopwords = set()
	pass
else:
	#print("\nStarted parsing TXT with stopwords, wait for a while...")
	start_time = time()
	stopwords = split.get_list(stopwords_file, enableComments = True)
	end_time = time()
	print("\nParsing TXT with stopwords took {0:.3f}".format(end_time - start_time), "seconds")
	stopwords = set(stopwords)
コード例 #2
0
ファイル: run_bak.py プロジェクト: coolhomeuc/Plagiarize3
            if (retcode != 0):
                print("Error while parsing input PDF file!")
                exit(1)
    
            end_time = time()
            print("Parsing input PDF took {0:.3f}".format(end_time - start_time), "seconds", end = "\n\n")
    
        except OSError:
            print("Error while parsing input pdf file!")
            exit(1)

    
    #Getting words from a txt file
    print("Started parsing input TXT, wait for a while...")
    start_time = time()
    text = split.get_text(input_file.path_txt)
    end_time = time()
    print("Parsing input TXT took {0:.3f}".format(end_time - start_time), "seconds", end = "\n\n")
    input_file.processed = 1
    input_file.hash()
    input_file.text = split.get_text(input_file.path_txt)

    #Getting words for deleting
    stopwords = set()
    if (stopwords_file.path_txt == None):
        pass
    else:
        print("Started parsing TXT with stopwords, wait for a while...")
        start_time = time()
        stopwords = split.get_list(stopwords_file.path_txt, enableComments = True)
        stopwords = set(stopwords)