コード例 #1
0
    def load_files_not_first(self, path='static/outfile'):
        files = utilities.load_file(path)
        return files


# # this function is extracting the content of an uncleaned txt file
# # for example the content of the file that user uploaded
#     def extract_fromTXT(path):
#
#         #all_files = filenames(path)
#         files_content = []
#
#         for filename in all_files:
#             with open(os.path.join(path, filename)) as file:
#                 content = file.read()
#             content = re.sub(r"<.*?>", "", content)
#             content = re.sub(r"\n", " " , content)
#             content = re.sub(r"\t", " ", content)
#
#             content = re.sub(r"[0-9]", " ", content)
#             content = re.sub(r"\[|\]", " ", content)
#             content = re.sub(r"[\.\(\)\:\|]"," ", content)
#             content = re.sub(r"[\.\(\)\:\|\/\$\;\"\"\&\#\,]", " ", content)
#             #content = re.sub(r" *+ *"," ",content)
#             content = re.sub(r" +", " ", content)
#             content = content.lower()
#
#             files_content.append(content)
#
#         return files_content, filename
コード例 #2
0
def keyword_content(keyword):

    all_files = 'static/outfiles'
    all_names = 'static/filenames'

    all_contents = utilities.load_file(all_files)
    all_names = hp.load_filenames_not_first(all_names)

    results = []

    for i in range(len(all_contents)):
        if keyword.lower() in all_contents[i].lower():
            results.append(all_names[i])

    return results
コード例 #3
0
 def load_reduced_vectors(self, path='static/reduced_vectors'):
     reduced_vectors = utilities.load_file(path)
     return reduced_vectors
コード例 #4
0
 def load_dim_red_model(self, path='static/PCA'):
     reducer = utilities.load_file(path)
     return reducer
コード例 #5
0
 def load_vector(self, path='static/vectors'):
     vectors = utilities.load_file(path)
     return vectors
コード例 #6
0
 def load_vectorizer(self, path="static/vectorizer"):
     vectorizer = utilities.load_file(path)
     return vectorizer
コード例 #7
0
 def load_all_top_words_not_first(self, path="static/all_file_topwords"):
     top_words = utilities.load_file(path)
     return top_words
コード例 #8
0
 def load_filenames_not_first(self, path="static/filenames"):
     filenames = utilities.load_file(path)
     return filenames