def load_files_not_first(self, path='static/outfile'): files = utilities.load_file(path) return files # # this function is extracting the content of an uncleaned txt file # # for example the content of the file that user uploaded # def extract_fromTXT(path): # # #all_files = filenames(path) # files_content = [] # # for filename in all_files: # with open(os.path.join(path, filename)) as file: # content = file.read() # content = re.sub(r"<.*?>", "", content) # content = re.sub(r"\n", " " , content) # content = re.sub(r"\t", " ", content) # # content = re.sub(r"[0-9]", " ", content) # content = re.sub(r"\[|\]", " ", content) # content = re.sub(r"[\.\(\)\:\|]"," ", content) # content = re.sub(r"[\.\(\)\:\|\/\$\;\"\"\&\#\,]", " ", content) # #content = re.sub(r" *+ *"," ",content) # content = re.sub(r" +", " ", content) # content = content.lower() # # files_content.append(content) # # return files_content, filename
def keyword_content(keyword): all_files = 'static/outfiles' all_names = 'static/filenames' all_contents = utilities.load_file(all_files) all_names = hp.load_filenames_not_first(all_names) results = [] for i in range(len(all_contents)): if keyword.lower() in all_contents[i].lower(): results.append(all_names[i]) return results
def load_reduced_vectors(self, path='static/reduced_vectors'): reduced_vectors = utilities.load_file(path) return reduced_vectors
def load_dim_red_model(self, path='static/PCA'): reducer = utilities.load_file(path) return reducer
def load_vector(self, path='static/vectors'): vectors = utilities.load_file(path) return vectors
def load_vectorizer(self, path="static/vectorizer"): vectorizer = utilities.load_file(path) return vectorizer
def load_all_top_words_not_first(self, path="static/all_file_topwords"): top_words = utilities.load_file(path) return top_words
def load_filenames_not_first(self, path="static/filenames"): filenames = utilities.load_file(path) return filenames