print("Started getting text from \"{0}\"".format(input_file.path_txt)) start_time = time() input_file.text = file_process.get_text(input_file) end_time = time() print("Getting text from \"{0}\" took {1:.3f}s".format(input_file.path_txt, end_time - start_time), end = "\n\n") """ Started getting keywords from input_file """ stopwords = set() if (stopwords_file.path_txt == None): pass else: print("Started parsing TXT with stopwords, wait for a while...") start_time = time() stopwords = split.get_list(stopwords_file.path_txt, enableComments = True) stopwords = set(stopwords) end_time = time() print("Parsing TXT with stopwords took {0:.3f}s".format(end_time - start_time), end = "\n\n") """ Started getting keyword phrases from input_file """ print("Started getting keyword phrases") start_time = time() keywords = keywords.getKeyPhrases(input_file.text, stopwords, lemmatizer = lemmatize) end_time = time() input_file.keywords = keywords if len(keywords) == 0: print("No keywords were found for an input file \"{0}\"".format(input_file.path_txt)) exit(0)
retcode = subprocess.call(["python", "./pdf_import.py", pdf, pdf + ".txt"]) if (retcode != 0): print("Error while parsing PDF file!") exit(1) end_time = time.time() print("Parsing PDF took {0:.3f}".format(end_time - start_time), "seconds") except OSError: print("Error while trying to parse pdf file!") exit(1) #Getting words from a txt file print("\nStarted parsing TXT, wait for a while...") start_time = time.time() text = split.get_list(input_file, enableComments = False) end_time = time.time() print("Parsing TXT took {0:.3f}".format(end_time - start_time), "seconds") #Getting words for deleting if (stopwords_file == ''): stopwords = set() pass else: #print("\nStarted parsing TXT with stopwords, wait for a while...") start_time = time.time() stopwords = split.get_list(stopwords_file, enableComments = True) end_time = time.time() print("\nParsing TXT with stopwords took {0:.3f}".format(end_time - start_time), "seconds") stopwords = set(stopwords)
#Getting words from a txt file print("\nStarted parsing TXT, wait for a while...") start_time = time() #text = split.get_list(input_file, enableComments = False) text = split.get_text(input_file) end_time = time() print("Parsing TXT took {0:.3f}".format(end_time - start_time), "seconds") #Getting words for deleting if (stopwords_file == ''): stopwords = set() pass else: #print("\nStarted parsing TXT with stopwords, wait for a while...") start_time = time() stopwords = split.get_list(stopwords_file, enableComments = True) end_time = time() print("\nParsing TXT with stopwords took {0:.3f}".format(end_time - start_time), "seconds") stopwords = set(stopwords) print("\nStarted getting keyword phrases") start_time = time() keywords = keywords.getKeyPhrases(text, stopwords, lemmatizer = lemmatizer) end_time = time() print("Getting keyword phrases took {0:.3f}".format(end_time - start_time), "seconds") if (output_file == ''): print("\nKeywords (generated by RAKE):\n") for key in keywords: print(key[0]) else: