def zip_workspace(self) -> str: """Sends a zip file containing a pickle file of session & its folder. :return: the path of the zipped workspace """ # TODO: move this to matrix model # initialize the save path save_path = os.path.join( constants.UPLOAD_FOLDER, constants.WORKSPACE_DIR) rounded_next_id = str(self.next_id % 10000) # take the last 4 digit workspace_file_path = os.path.join( constants.UPLOAD_FOLDER, rounded_next_id + '_' + constants.WORKSPACE_FILENAME) # remove unnecessary content in the workspace try: shutil.rmtree( os.path.join( session_manager.session_folder(), constants.RESULTS_FOLDER)) # attempt to remove result folder(CSV matrix that kind of crap) except FileNotFoundError: pass # move session folder to work space folder try: # try to remove previous workspace in order to resolve conflict os.remove(workspace_file_path) except FileNotFoundError: pass try: # empty the save path in order to resolve conflict shutil.rmtree(save_path) except FileNotFoundError: pass general_functions.copy_dir(session_manager.session_folder(), save_path) # save session in the work space folder session_manager.save(save_path) # zip the dir zip_file = zipfile.ZipFile(workspace_file_path, 'w') general_functions.zip_dir(save_path, zip_file) zip_file.close() # remove the original dir shutil.rmtree(save_path) return workspace_file_path
def get_download_path(self) -> str: """Write the generated top word results to an output CSV file. :return: path of the generated CSV file. """ # Get topword result. topword_result = self._get_result() # Get the default saving directory of topword result. result_folder_path = os.path.join(session_manager.session_folder(), RESULTS_FOLDER) # Attempt to make the directory. if not os.path.isdir(result_folder_path): os.makedirs(result_folder_path) # Get the complete saving path of topword result. save_path = os.path.join(result_folder_path, TOPWORD_CSV_FILE_NAME) # Write to the file. with open(save_path, 'w', encoding='utf-8') as file: # Write header to the file. file.write(topword_result.header + '\n') # Write results to the file. # Since we want indexes and data in rows, we get the transpose. for result in topword_result.results: file.write( pd.DataFrame(result).transpose().to_csv(header=True)) return save_path
def get_download_path(self) -> str: """Write the generated top word results to an output CSV file. :return: path of the generated CSV file. """ # Get topword result. topword_result = self._get_result() # Get the default saving directory of topword result. result_folder_path = os.path.join( session_manager.session_folder(), RESULTS_FOLDER) # Attempt to make the directory. if not os.path.isdir(result_folder_path): os.makedirs(result_folder_path) # Get the complete saving path of topword result. save_path = os.path.join(result_folder_path, TOPWORD_CSV_FILE_NAME) # Write to the file. with open(save_path, 'w', encoding='utf-8') as file: # Write header to the file. file.write(topword_result.header + '\n') # Write results to the file. # Since we want indexes and data in rows, we get the transpose. for result in topword_result.results: file.write( pd.DataFrame(result).transpose().to_csv(header=True)) return save_path
def generate_rw_matrix(data_list): """ Generates rolling windows graph raw data matrix Args: data_list: a list of [x, y] points Returns: Output file path and extension. """ extension = '.csv' deliminator = ',' folder_path = path_join(session_manager.session_folder(), constants.RESULTS_FOLDER) if not os.path.isdir(folder_path): makedirs(folder_path) out_file_path = path_join(folder_path, 'RWresults' + extension) rows = ["" for _ in range(len(data_list[0]))] with open(out_file_path, 'w', encoding='utf-8') as out_file: for i in range(len(data_list)): for j in range(len(data_list[i])): rows[j] = rows[j] + str(data_list[i][j]) + deliminator for i in range(len(rows)): out_file.write(rows[i] + '\n') out_file.close() return out_file_path, extension
def zip_workspace(self) -> str: """Sends a zip file containing a pickle file of session & its folder. :return: the path of the zipped workspace """ # TODO: move this to matrix model # initialize the save path save_path = os.path.join(constants.UPLOAD_FOLDER, constants.WORKSPACE_DIR) rounded_next_id = str(self.next_id % 10000) # take the last 4 digit workspace_file_path = os.path.join( constants.UPLOAD_FOLDER, rounded_next_id + '_' + constants.WORKSPACE_FILENAME) # remove unnecessary content in the workspace try: shutil.rmtree( os.path.join(session_manager.session_folder(), constants.RESULTS_FOLDER)) # attempt to remove result folder(CSV matrix that kind of crap) except FileNotFoundError: pass # move session folder to work space folder try: # try to remove previous workspace in order to resolve conflict os.remove(workspace_file_path) except FileNotFoundError: pass try: # empty the save path in order to resolve conflict shutil.rmtree(save_path) except FileNotFoundError: pass general_functions.copy_dir(session_manager.session_folder(), save_path) # save session in the work space folder session_manager.save(save_path) # zip the dir zip_file = zipfile.ZipFile(workspace_file_path, 'w') general_functions.zip_dir(save_path, zip_file) zip_file.close() # remove the original dir shutil.rmtree(save_path) return workspace_file_path
def update_workspace(self): """Updates the whole work space.""" # update the savepath of each file for l_file in list(self.files.values()): l_file.save_path = pathjoin(session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER, str(l_file.id) + '.txt') # update the session session_manager.load()
def update_workspace(self): """Updates the whole work space.""" # update the savepath of each file for l_file in list(self.files.values()): l_file.savePath = pathjoin( session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER, str(l_file.id) + '.txt') # update the session session_manager.load()
def big_pca(): """Reads the big image of the PCA and displays it on the web browser. :return: a response object with the big PCA as a png to flask and eventually to the browser. """ if constants.PCA_BIG_GRAPH_FILENAME: folder = path_join(session_manager.session_folder(), constants.RESULTS_FOLDER) plotly_url = os.path.join(folder, constants.PCA_BIG_GRAPH_FILENAME) return send_file(plotly_url)
def handle_upload_workspace(self): """Handles the session when you upload a workspace (.lexos) file.""" # save .lexos file save_path = os.path.join(constants.UPLOAD_FOLDER, constants.WORKSPACE_DIR) save_file = os.path.join(save_path, str(self.next_id) + '.zip') try: os.makedirs(save_path) except FileExistsError: pass f = open(save_file, 'wb') f.write(request.data) f.close() # clean the session folder shutil.rmtree(session_manager.session_folder()) # extract the zip upload_session_path = os.path.join( constants.UPLOAD_FOLDER, str(self.next_id) + '_upload_work_space_folder') with zipfile.ZipFile(save_file) as zf: zf.extractall(upload_session_path) general_functions.copy_dir(upload_session_path, session_manager.session_folder()) # remove temp shutil.rmtree(save_path) shutil.rmtree(upload_session_path) try: # if there is no file content folder make one. # this dir will be lost during download(zip) if your original file # content folder does not contain anything. os.makedirs( os.path.join(session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER)) except FileExistsError: pass
def handle_upload_workspace(self): """Handles the session when you upload a workspace (.lexos) file.""" # save .lexos file save_path = os.path.join(constants.UPLOAD_FOLDER, constants.WORKSPACE_DIR) save_file = os.path.join(save_path, str(self.next_id) + '.zip') try: os.makedirs(save_path) except FileExistsError: pass f = open(save_file, 'wb') f.write(request.data) f.close() # clean the session folder shutil.rmtree(session_manager.session_folder()) # extract the zip upload_session_path = os.path.join( constants.UPLOAD_FOLDER, str( self.next_id) + '_upload_work_space_folder') with zipfile.ZipFile(save_file) as zf: zf.extractall(upload_session_path) general_functions.copy_dir(upload_session_path, session_manager.session_folder()) # remove temp shutil.rmtree(save_path) shutil.rmtree(upload_session_path) try: # if there is no file content folder make one. # this dir will be lost during download(zip) if your original file # content folder does not contain anything. os.makedirs(os.path.join(session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER)) except FileExistsError: pass
def k_means_image(): """Reads the png image of the kmeans and displays it on the web browser. *kmeansimage() linked to in analysis.html, displaying the kmeansimage.png :return: a response object with the kmeansimage png to flask and eventually to the browser. """ # kmeansimage() is called in kmeans.html, displaying the # KMEANS_GRAPH_FILENAME (if session['kmeansdatagenerated'] != False). image_path = path_join(session_manager.session_folder(), constants.RESULTS_FOLDER, constants.KMEANS_GRAPH_FILENAME) return send_file(image_path)
def save_file_manager(file_manager: FileManager): """ Saves the file manager to the hard drive. Args: file_manager: File manager object to be saved. Returns: None """ file_manager_path = os.path.join(session_folder(), constants.FILEMANAGER_FILENAME) pickle.dump(file_manager, open(file_manager_path, 'wb'))
def __init__(self): """Class for object to hold info about user's files & choices in Lexos. Each user will have their own unique instance of the FileManager. A major data attribute of this class is a dictionary holding the LexosFile objects, each representing an uploaded file to be used in Lexos. The key for the dictionary is the unique ID of the file, with the value being the corresponding LexosFile object. """ self._files = {} self.next_id = 0 makedirs(pathjoin(session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER))
def scrub_contents(self, saving_changes: bool) -> str: """ Scrubs the contents of the file according to the user's options May save the changes or not. :param saving_changes: boolean saying whether or not to save the changes made. :return: a preview string of the possibly changed file. """ storage_options = [] for key in list(request.form.keys()): if 'usecache' in key: storage_options.append(key[len('usecache'):]) if 'scrub' not in self.options: self.options['scrub'] = {} scrub_options = self.get_scrub_options() text_strfile_managering = self.load_contents() text_string = scrubber.scrub( text_strfile_managering, gutenberg=self.is_gutenberg, lower=scrub_options['lowercasebox'], punct=scrub_options['punctuationbox'], apos=scrub_options['aposbox'], hyphen=scrub_options['hyphensbox'], amper=scrub_options['ampersandbox'], digits=scrub_options['digitsbox'], tags=scrub_options['tagbox'], white_space=scrub_options['whitespacebox'], spaces=scrub_options['spacesbox'], tabs=scrub_options['tabsbox'], new_lines=scrub_options['newlinesbox'], opt_uploads=request.files, storage_options=storage_options, storage_folder=session_manager.session_folder() + '/scrub/', previewing=not saving_changes) if saving_changes: self.save_contents(text_string) self.save_scrub_options() # renew the preview self.contents_preview = self.generate_preview(text_string) text_string = self.contents_preview return text_string
def scrub_contents(self, saving_changes: bool) -> str: """ Scrubs the contents of the file according to the user's options May save the changes or not. :param saving_changes: boolean saying whether or not to save the changes made. :return: a preview string of the possibly changed file. """ storage_options = [] for key in list(request.form.keys()): if 'usecache' in key: storage_options.append(key[len('usecache'):]) if 'scrub' not in self.options: self.options['scrub'] = {} scrub_options = self.get_scrub_options() text_strfile_managering = self.load_contents() text_string = scrubber.scrub( text_strfile_managering, gutenberg=self.is_gutenberg, lower=scrub_options['lowercasebox'], punct=scrub_options['punctuationbox'], apos=scrub_options['aposbox'], hyphen=scrub_options['hyphensbox'], amper=scrub_options['ampersandbox'], digits=scrub_options['digitsbox'], tags=scrub_options['tagbox'], white_space=scrub_options['whitespacebox'], spaces=scrub_options['spacesbox'], tabs=scrub_options['tabsbox'], new_lines=scrub_options['newlinesbox'], opt_uploads=request.files, storage_options=storage_options, storage_folder=session_manager.session_folder() + '/scrub/', previewing=not saving_changes) if saving_changes: self.save_contents(text_string) self.save_scrub_options() # renew the preview self.contents_preview = self.generate_preview(text_string) text_string = self.contents_preview return text_string
def __init__(self): """Class for object to hold info about user's files & choices in Lexos. Each user will have their own unique instance of the FileManager. A major data attribute of this class is a dictionary holding the LexosFile objects, each representing an uploaded file to be used in Lexos. The key for the dictionary is the unique ID of the file, with the value being the corresponding LexosFile object. """ self._files = {} self.next_id = 0 makedirs( pathjoin(session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER))
def load_file_manager() -> FileManager: """ Loads the file manager for the specific session from the hard drive. Args: None Returns: The file manager object for the session. """ file_manager_path = os.path.join(session_folder(), constants.FILEMANAGER_FILENAME) file_manager = pickle.load(open(file_manager_path, 'rb')) return file_manager
def generate_rw_matrix_plot(data_points: List[List[List[int]]], legend_labels_list: List[str]) -> Tuple[str, str]: """ Generates rolling windows graph raw data matrix Args: data_points: a list of [x, y] points Returns: Output file path and extension. """ extension = '.csv' deliminator = ',' folder_path = path_join(session_manager.session_folder(), constants.RESULTS_FOLDER) if not os.path.isdir(folder_path): makedirs(folder_path) out_file_path = path_join(folder_path, 'RWresults' + extension) max_len = 0 for i in range(len(data_points)): if len(data_points[i]) > max_len: max_len = len(data_points[i]) max_len += 1 rows = [""] * max_len legend_labels_list[0] = legend_labels_list[0].split('#') rows[0] = (deliminator + deliminator).join( legend_labels_list[0]) + deliminator + deliminator with open(out_file_path, 'w', encoding='utf-8') as out_file: for i in range(len(data_points)): for j in range(1, len(data_points[i]) + 1): rows[j] = rows[j] + str( data_points[i][j - 1][0]) + deliminator + str( data_points[i][j - 1][1]) + deliminator for i in range(len(rows)): out_file.write(rows[i] + '\n') out_file.close() return out_file_path, extension
def download_rwa(self) -> str: """Download rolling window analysis result as CSV file. :return: The directory of the saved CSV file. """ # Get the default saving directory of rolling window result. result_folder_path = os.path.join( session_manager.session_folder(), RESULTS_FOLDER) # Attempt to make the directory. if not os.path.isdir(result_folder_path): os.makedirs(result_folder_path) # Get the complete saving path of rolling window result. save_path = os.path.join(result_folder_path, "rolling_window.csv") self._get_rwa_csv_frame().to_csv(path_or_buf=save_path, index_label="# Window", na_rep="NA") return save_path
def download_rwa(self) -> str: """Download rolling window analysis result as CSV file. :return: The directory of the saved CSV file. """ # Get the default saving directory of rolling window result. result_folder_path = os.path.join(session_manager.session_folder(), RESULTS_FOLDER) # Attempt to make the directory. if not os.path.isdir(result_folder_path): os.makedirs(result_folder_path) # Get the complete saving path of rolling window result. save_path = os.path.join(result_folder_path, "rolling_window.csv") self._get_rwa_csv_frame().to_csv(path_or_buf=save_path, index_label="# Window", na_rep="NA") return save_path
def __init__(self, original_filename: str, file_name: str, file_string: str, file_id: int): """Class for an object to hold all info about a specific uploaded file. Each uploaded file will be stored in a unique object, and accessed through the FileManager files dictionary. A major data attribute of this class is a string that (sometimes) contains the text contents of the file (Most of the time). This newly constructed LexosFile object is created from the information passed in, and performs some preliminary processing. :param original_filename: the original file name of the uploaded file. :param file_name: the file name we store. :param file_string: contents of the file's text. :param file_id: the ID to assign to the new file. """ self.doc_type = 'text' # default doc type self.id = file_id self.original_source_filename = original_filename self.name = file_name self.contents_preview = self.generate_preview(file_string) self.save_path = pathjoin( session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER, str(self.id) + '.txt') self.save_contents(file_string) self.active = True self.class_label = '' split_name = self.name.split('.') self.label = '.'.join(split_name[:-1]) self.set_type_from(split_name[-1], file_string) self.has_tags = self.check_for_tags(file_string) self.is_gutenberg = self.check_for_gutenberg(file_string) self.options = {}
def __init__(self, original_filename: str, file_name: str, file_string: str, file_id: int): """Class for an object to hold all info about a specific uploaded file. Each uploaded file will be stored in a unique object, and accessed through the FileManager files dictionary. A major data attribute of this class is a string that (sometimes) contains the text contents of the file (Most of the time). This newly constructed LexosFile object is created from the information passed in, and performs some preliminary processing. :param original_filename: the original file name of the uploaded file. :param file_name: the file name we store. :param file_string: contents of the file's text. :param file_id: the ID to assign to the new file. """ self.doc_type = 'text' # default doc type self.id = file_id self.original_source_filename = original_filename self.name = file_name self.contents_preview = self.generate_preview(file_string) self.save_path = pathjoin( session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER, str(self.id) + '.txt') self.save_contents(file_string) self.active = True self.class_label = '' split_name = self.name.split('.') self.label = '.'.join(split_name[:-1]) self.set_type_from(split_name[-1], file_string) self.has_tags = self.check_for_tags(file_string) self.is_gutenberg = self.check_for_gutenberg(file_string) self.options = {}
def download_dtm(self) -> str: """Download the desired DTM as a CSV file. :return: The file path that saves the CSV file. """ # Select proper DTM based on users choice. required_dtm = self._get_file_col_dtm() \ if self._front_end_option.orientation == "file_col" \ else self._get_file_row_dtm() # Get the default folder path, if it does not exist, create one. folder_path = os.path.join(session_folder(), RESULTS_FOLDER) if not os.path.isdir(folder_path): os.makedirs(folder_path) # Set the default file path. file_path = os.path.join(folder_path, "tokenizer_result.csv") # Save it to the file path. required_dtm.to_csv(file_path) # Return where the file is. return file_path
def get_topword_csv_path(self, class_division_map: pd.DataFrame) -> str: """Write the generated top word results to an output CSV file. :param class_division_map: a pandas data frame where: - the data is the division map with boolean values that indicate which class each file belongs to. - the index is the class labels. - the column is the file id. :return: path of the generated CSV file. """ # Make the path. result_folder_path = os.path.join(session_manager.session_folder(), RESULTS_FOLDER) # Attempt to make the save path directory. try: os.makedirs(result_folder_path) except OSError: pass # Get the path to save file. save_path = os.path.join(result_folder_path, TOPWORD_CSV_FILE_NAME) # Get topword result. topword_result = \ self._get_result(class_division_map=class_division_map) with open(save_path, 'w', encoding='utf-8') as f: # Write header to the file. f.write(topword_result.header + '\n') # Write results to the file. # Since we want indexes and data in rows, we get the transpose. for result in topword_result.results: f.write(pd.DataFrame(result).transpose().to_csv(header=True)) return save_path
def generate_mc_json_obj(file_manager: FileManager): """ Generates a JSON object for multicloud when working with a mallet .txt file Args: malletPath: path to the saved mallet .txt file Returns: An object, formatted in the JSON that d3 needs, either a list or a dictionary. """ content_path = os.path.join(session_manager.session_folder(), constants.FILE_CONTENTS_FOLDER, constants.MALLET_INPUT_FILE_NAME) output_path = os.path.join(session_manager.session_folder(), constants.RESULTS_FOLDER, constants.MALLET_OUTPUT_FILE_NAME) try: makedirs( path_join(session_manager.session_folder(), constants.RESULTS_FOLDER)) # attempt to make the result dir except FileExistsError: pass # result dir already exists if request.form['analysistype'] == 'userfiles': json_obj = generate_json_for_d3(file_manager, merged_set=False) else: # request.form['analysistype'] == 'topicfile' topic_string = str(request.files['optuploadname']) topic_string = re.search(r"'(.*?)'", topic_string) topic_string = topic_string.group(1) if topic_string != '': request.files['optuploadname'].save(content_path) with open(content_path, 'r', encoding='utf-8') as f: content = f.read() # reads content from the upload file # Coerce to non UTF-8 files to UTF-8 encoding = general_functions.get_encoding(content) if encoding != 'utf-8': content = content.decode(encoding).encode('utf-8') if content.startswith('#doc source pos typeindex type topic'): # begin converting a Mallet file into the file d3 can understand tuples = [] # Read the output_state file with open(content_path, encoding='utf-8') as f: # Skip the first three lines for _ in range(3): next(f) # Create a list of type:topic combinations for line in f: # Make sure the number of columns is correct line = re.sub(r'\s+', ' ', line) try: doc, source, pos, type_index, doc_type, topic = \ line.rstrip().split(' ') type_topic_combination = doc_type + ':' + topic tuples.append(type_topic_combination) except BaseException: raise Exception( "Your source data cannot be parsed into a regular " "number of columns. Please ensure that there are " "no spaces in your file names or file paths. It; " "may be easiest to open the outpt_state file in a " "spreadsheet using a space as; the delimiter and " "text as the field type. Data should only be " "present in columns; A to F. Please fix any " "misaligned data and run this script again.") # Count the number of times each type-topic combo appears from collections import defaultdict topic_count = defaultdict(int) for x in tuples: topic_count[x] += 1 # Populate a topic_counts dict with type: topic:count words = [] topic_counts = {} for k, v in topic_count.items(): doc_type, topic = k.split(':') count = int(v) tc = topic + ":" + str(count) if doc_type in words: topic_counts[doc_type] = topic_counts[doc_type] + " " + tc else: topic_counts[doc_type] = tc words.append(doc_type) # Add a word ID out = "" i = 0 for k, v in topic_counts.items(): out += str(i) + " " + k + " " + v + "\n" i += 1 # Write the output file with open(output_path, 'w', encoding='utf-8') as f: f.write(out) # Python will convert \n to os.linesep # end converting a Mallet file into the file d3 can understand else: with open(output_path, 'w', encoding='utf-8') as f: # if this is the json form, # just write that in the output folder f.write(content) json_obj = multicloud_topic.topic_json_maker(output_path) return json_obj
def _file_manager_path(self) -> str: """Get the path of the file manager pickle file.""" return os.path.join(session_folder(), constants.FILEMANAGER_FILENAME)
def generate_k_means_voronoi(file_manager: FileManager): """ Generates a table of cluster_number and file name from the active files. Args: None Returns: kmeans_index: a list of index of the closest center of the file siltt_score: a float of silhouette score based on KMeans algorithm file_name_str: a string of file names, separated by '#' k_value: an int of the number of K from input """ ngram_size, use_word_tokens, use_freq, use_tfidf, norm_option, grey_word, \ show_grey_word, only_char_grams_within_words, mfw, culling = \ file_manager.get_matrix_options_deprec() count_matrix = file_manager.get_matrix_deprec( use_word_tokens=use_word_tokens, use_tfidf=False, norm_option=norm_option, only_char_grams_within_words=only_char_grams_within_words, n_gram_size=ngram_size, use_freq=False, grey_word=grey_word, show_grey_word=show_grey_word, mfw=mfw, cull=culling) del count_matrix[0] for row in count_matrix: del row[0] matrix = np.array(count_matrix) # Gets options from request.form and uses options to generate the K-mean # results k_value = len(file_manager.get_active_files()) / 2 # default K value max_iter = 300 # default number of iterations init_method = request.form['init'] n_init = 300 tolerance = 1e-4 if (request.form['nclusters'] != '') and (int(request.form['nclusters']) != k_value): k_value = int(request.form['nclusters']) if (request.form['max_iter'] != '') and (int(request.form['max_iter']) != max_iter): max_iter = int(request.form['max_iter']) if request.form['n_init'] != '': n_init = int(request.form['n_init']) if request.form['tolerance'] != '': tolerance = float(request.form['tolerance']) metric_dist = request.form['KMeans_metric'] file_name_list = [] for l_file in list(file_manager.files.values()): if l_file.active: if request.form["file_" + str(l_file.id)] == l_file.label: file_name_list.append(l_file.label) else: new_label = request.form["file_" + str(l_file.id)] file_name_list.append(new_label) file_name_str = file_name_list[0] for i in range(1, len(file_name_list)): file_name_str += "#" + file_name_list[i] folder_path = path_join(session_manager.session_folder(), constants.RESULTS_FOLDER) if not os.path.isdir(folder_path): makedirs(folder_path) kmeans_index, siltt_score, color_chart, final_points_list, \ final_centroids_list, text_data, max_x = KMeans.get_k_means_voronoi( matrix, k_value, max_iter, init_method, n_init, tolerance, metric_dist, file_name_list) return kmeans_index, siltt_score, file_name_str, k_value, color_chart, \ final_points_list, final_centroids_list, text_data, max_x
def generate_csv(file_manager: FileManager) -> Tuple[str, str]: """ Generates a CSV file from the active files. Args: None Returns: The filepath where the CSV was saved, and the chosen extension (.csv or .tsv) for the file. """ transpose = request.form['csvorientation'] == 'filerow' use_tsv = request.form['csvdelimiter'] == 'tab' extension = '.tsv' if use_tsv else '.csv' count_matrix = generate_csv_matrix(file_manager) delimiter = '\t' if use_tsv else ',' # add quotes to escape the tab and comma in csv and tsv if transpose: # escape all the file name count_matrix[0] = [ '"' + file_name + '"' for file_name in count_matrix[0] ] else: # escape all the file name count_matrix[0] = [ '"' + file_name + '"' for file_name in count_matrix[0] ] count_matrix = list(zip(*count_matrix)) # transpose the matrix # escape all the comma and tab in the word, and makes the leading item # empty string. count_matrix[0] = [''] + ['"' + word + '"' for word in count_matrix[0][1:]] count_matrix = list(zip(*count_matrix)) # transpose the matrix back folder_path = path_join(session_manager.session_folder(), constants.RESULTS_FOLDER) if not os.path.isdir(folder_path): makedirs(folder_path) out_file_path = path_join(folder_path, 'results' + extension) # Write results to output file, and write class labels depending on # transpose class_label_list = ["Class Label"] for l_file in list(file_manager.files.values()): if l_file.active: class_label_list.append(l_file.class_label) with open(out_file_path, 'w', encoding='utf-8') as out_file: for i, row in enumerate(count_matrix): row_str = delimiter.join([str(item) for item in row]) if transpose: row_str += delimiter + class_label_list[i] out_file.write(row_str + '\n') if not transpose: out_file.write(delimiter.join(class_label_list) + '\n') out_file.close() return out_file_path, extension
def _file_manager_path(self) -> str: """Get the path of the file manager pickle file.""" return os.path.join(session_folder(), constants.FILEMANAGER_FILENAME)