def clear_dir(dir_list): """.. Clear directory. Create directory if nonexistent. Clears all directories in list ``dir_list`` using system command. Safely clears symbolic links. Directories can be specified with the * shell pattern (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__). Note ---- To clear a directory means to remove all contents of a directory. If the directory is nonexistent, the directory is created, unless the directory is specified via shell pattern. Parameters ---------- dir_list : str, list Directory or list of directories to clear. Returns ------- None Example ------- The following code clears directories ``dir1`` and ``dir2``. .. code-block:: python clear_dir(['dir1', 'dir2']) The following code clears directories beginning with ``dir``. .. code-block:: python clear_dir(['dir*']) """ try: dir_list = convert_to_list(dir_list, 'dir') dir_glob = [] for dir_path in dir_list: expand = glob.glob(dir_path) expand = expand if expand else [dir_path] dir_glob.extend(expand) remove_dir(dir_glob, quiet = True) for dir_path in dir_glob: os.makedirs(dir_path) message = 'Cleared: `%s`' % dir_path print(colored(message, metadata.color_success)) except: error_message = 'Error with `clear_dir`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def remove_dir(dir_list, quiet = False): """.. Remove directory using system command. Remove directories in list ``dir_list`` using system command. Safely removes symbolic links. Directories can be specified with the * shell pattern (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__). Parameters ---------- dir_list : str, list Directory or list of directories to remove. quiet : bool, optional Suppress printing of directories removed. Defaults to ``False``. Returns ------- None Example ------- The following code removes directories ``dir1`` and ``dir2``. .. code-block:: python remove_dir(['dir1', 'dir2']) The following code removes directories beginning with ``dir``. .. code-block:: python remove_dir(['dir1*']) """ try: dir_list = convert_to_list(dir_list, 'dir') dir_list = [norm_path(dir_path) for dir_path in dir_list] dir_list = [d for directory in dir_list for d in glob.glob(directory)] for dir_path in dir_list: if os.path.isdir(dir_path): remove_path(dir_path, quiet = quiet) elif os.path.isfile(dir_path): raise_from(TypeError(messages.type_error_not_dir % dir_path), None) except: error_message = 'Error with `remove_dir`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def parse_file_list(self): """Parse wildcards in list of files. Returns ------- None """ self.file_list = convert_to_list(file_list, 'file') file_list_parsed = [ f for file in self.file_list for f in glob.glob(file) ] if file_list_parsed: self.file_list = file_list_parsed else: error_list = [str(f) for f in self.file_list] raise CritError(messages.crit_error_no_files % error_list)
def tablefill(inputs, template, output, null='.'): """.. Fill tables for template using inputs. Fills tables in document ``template`` using files in list ``inputs``. Writes filled document to file ``output``. Null characters in ``inputs`` are replaced with value ``null``. Parameters ---------- inputs : list Input or list of inputs to fill into template. template : str Path of template to fill. output : str Path of output. null : str Value to replace null characters (i.e., ``''``, ``'.'``, ``'NA'``). Defaults to ``'.'``. Returns ------- None Example ------- .. code-block:: ################################################################# # tablefill_readme.txt - Help/Documentation for tablefill.py ################################################################# Description: tablefill.py is a Python module designed to fill LyX/Tex tables with output from text files (usually output from Stata or Matlab). Usage: Tablefill takes as input a LyX (or Tex) file containing empty tables (the template file) and text files containing data to be copied to these tables (the input files), and produces a LyX (or Tex) file with filled tables (the output file). For brevity, LyX will be used to denote LyX or Tex files throughout. Tablefill must first be imported to make.py. This is typically achieved by including the following lines: ``` from gslab_fill.tablefill import tablefill ``` Once the module has been imported, the syntax used to call tablefill is as follows: ``` tablefill(input = 'input_file(s)', template = 'template_file', output = 'output_file') ``` The argument 'template' is the user written LyX file which contains the tables to be filled in. The argument 'input' is a list of the text files containing the output to be copied to the LyX tables. If there are multiple input text files, they are listed as: input = 'input_file_1 input_file_2'. The argument 'output' is the name of the filled LyX file to be produced. Note that this file is created by tablefill.py and should not be edited manually by the user. ########################### Input File Format: ########################### The data needs to be tab-delimited rows of numbers (or characters), preceeded by `<label>`. The < and > are mandatory. The numbers can be arbitrarily long, can be negative, and can also be in scientific notation. Examples: ---------- ``` <tab:Test> 1 2 3 2 3 1 3 1 2 ``` ``` <tab:FunnyMat> 1 2 3 23 2 2 3 3 1 2 2 1 ``` (The rows do not need to be of equal length.) Completely blank (no tab) lines are ignored. If a "cell" is merely "." or "[space]", then it is treated as completely missing. That is, in the program: ``` <tab:Test> 1 2 3 2 . 1 3 3 1 2 ``` is equivalent to: ``` <tab:Test> 1 2 3 2 1 3 3 1 2 ``` This feature is useful as Stata outputs missing values in numerical variables as ".", and missing values in string variables as "[space]". ................................ Scientific Notation Notes: ................................ The scientific notation ihas to be of the form: [numbers].[numbers]e(+/-)[numbers] Examples: ``` 23.2389e+23 -2.23e-2 -0.922e+3 ``` ########################### Template LyX Format: ########################### The LyX template file determines where the numbers from the input files are placed. Every table in the template file (if it is to be filled) must appear within a float. There must be one, and only one, table object inside the float, and the table name must include a label object that corresponds to the label of the required table in the input file. Note that table names cannot be duplicated. For a single template file, each table to be filled must have a unique label, and there must be one, and only one, table with that same label in the text files used as input. Having multiple tables with the same name in the input files or in the template file will cause errors. Note also that labels are NOT case-sensitive. That is, <TAB:Table1> is considered the same as `<tab:table1>`. In the LyX tables, "cells" to be filled with entries from the input text files are indicated by the following tags: `"###" (no quotes)` or `"#[number][,]#" (no quotes)` The first case will result in a literal substitution. I.e. whatever is in the text tables for that cell will be copied over. The second case will convert the data table's number (if in scientific notation) and will truncate this converted number to [number] decimal places. It will automatically round while doing so. If a comma appears after the number (within #[number]#), then it will add commas to the digits to the left of the decimal place. Examples: --------- ``` 2309.2093 + ### = 2309.2093 2309.2093 + #4# = 2309.2093 2309.2093 + #5# = 2309.20930 2309.2093 + #20# = 2309.20930000000000000000 2309.2093 + #3# = 2309.209 2309.2093 + #2# = 2309.21 2309.2093 + #0# = 2309 2309.2093 + #0,# = 2,309 ``` ``` -2.23e-2 + #2# = -0.0223 + #2# = -0.02 -2.23e-2 + #7# = -0.0223 + #7# = -0.0223000 -2.23e+10 + #7,# = -22300000000 + #7,# = -22,300,000,000.000000 ``` Furthermore, only ###/#num# will be replaced, allowing you to put things around ###/#num# to alter the final output: Examples: -------- ``` 2309.2093 + (#2#) = (2309.21) 2309.2093 + #2#** = 2309.21** 2309.2093 + ab#2#cd = ab2309.21cd ``` If you are doing exact substitution, then you can use characters: Examples: --------- `abc + ### = abc` ................................ Intentionally blank cells: ................................ If you would like to display a blank cell, you can use "---": Examples: --------- ``` --- + ### = --- --- + #3# = --- ``` ###################### # Example Combinations # Of input + template ###################### Example 1 (Simple) ---------- ``` Input: <tab:Test> 1 2 3 2 1 3 3 1 2 Template: `<tab:Test> ` (pretend this is what you see in LyX) ### ### ### ### ### ### ### ### ### Result:<tab:Test> 1 2 3 2 1 3 3 1 2 ``` Example 2 (More Complicated) ---------- ``` Input: <tab:Test> 1 . 3 2e-5 1 3.023 . -1 2 3 Template: <tab:Test> (pretend this is what you see in LyX) (###) 2 ### #3# ### #1# NA ### ### ### Result:<tab:Test> (1) 2 3 0.000 1 3.0 NA -1 2 3 ``` =================== ====Important====== =================== By design, missings in input table and "missings" in template do not have to line up. Example 3 (LyX) ---------- ``` Input: <tab:Test> 1 . 3 2e-5 . 3.023 . -1 2 Template: <tab:Test> ### ### abc abc #2# #3# NA ### ### Result:<tab:Test> 1 3 abc abc 0.00 3.023 NA -1 2 Recall that to the program, the above input table is no different from: 1 3 2e-5 3.023 -1 2 ``` It doesn't "know" where the numbers should be placed within a row, only what the next number to place should be. Similarly: Example 4 (LyX) ---------- ``` Input: <tab:Test> 1 1 2 1 1 3 2 -1 2 Template: <tab:Test> ### ### ### abc abc abc ### #2# #3# ### ### ### Result:<tab:Test> 1 1 2 abc abc abc 1 1.00 3.000 2 -1 2 ``` If a row in the template has no substitutions, then it's not really a row from the program's point of view. ###################### # Error Logging ###################### If an error occurs during the call to tablefill, it will be displayed in the command window. When make.py finishes, the user will be able to scroll up through the output and examine any error messages. Error messages, which include a description of the error type and a traceback to the line of code where the error occured, can also be retuned as a string object using the following syntax: exitmessage = tablefill( input = 'input_file(s)', template = 'template_file', output = 'output_file' ) Lines can then be added to make.py to output this string to a log file using standard Python and built in gslab_make commands. ###################### # Common Errors ###################### Common mistakes which can lead to errors include: - Mismatch between the length of the LyX table and the corresponding text table. If the LyX table has more entries to be filled than the text table has entries to fill from, this will cause an error and the table will not be filled. - Use of numerical tags (e.g. #1#) to fill non-numerical data. This will cause an error. Non-numerical data can only be filled using "###", as it does not make sense to round or truncate this data. - Multiple table objects in the same float. Each table float in the template LyX file can only contain one table object. If a float contains a second table object, this table will not be filled. ###################### # Boldfacing entries ###################### It is straightforward to develop functions that conditionally write entries of tables in boldface; functions may do so by inserting '\series bold' in the lines of the filled LyX file immeadiately before phrases that the user wishes to make bold. """ try: inputs = convert_to_list(inputs, 'file') inputs = [norm_path(file) for file in inputs] content = [_parse_content(file, null) for file in inputs] tables = {tag: data for (tag, data) in content} if (len(content) != len(tables)): raise_from(CritError(messages.crit_error_duplicate_tables), None) doc = _insert_tables(template, tables, null) with io.open(output, 'w', encoding='utf-8') as f: f.write(doc) except: error_message = 'Error with `tablefill`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def log_files_in_output(paths, depth = float('inf')): """.. Log files in output directory. Logs the following information for all files contained in directory ``output_dir``. - File name (in file ``output_statslog``) - Last modified (in file ``output_statslog``) - File size (in file ``output_statslog``) - File head (in file ``output_headslog``, optional) When walking through directory ``output_dir``, float ``depth`` determines level of depth to walk. Status messages are appended to file ``makelog``. Include additional output directories to walk through (typically directories that you wish to keep local) in directory list ``output_local_dir``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. depth : float, optional Level of depth when walking through output directory. Defaults to infinite. Path Keys --------- output_dir : str Path of output directory. output_local_dir : str, list, optional Path or list of paths of local output directories. Defaults to ``[]`` (i.e., none). output_statslog : str Path to write output statistics log. output_headslog : str, optional Path to write output headers log. makelog : str Path of makelog. Returns ------- None Example ------- The following code will log information for all files contained in only the first level of ``paths['output_dir']``. Therefore, files contained in subdirectories will be ignored. .. code-block:: python log_files_in_outputs(paths, depth = 1) The following code will log information for any file in ``paths['output_dir']``, regardless of level of subdirectory. .. code-block :: python log_files_in_outputs(paths, depth = float('inf')) """ try: output_dir = get_path(paths, 'output_dir') output_local_dir = get_path(paths, 'output_local_dir', throw_error = False) output_statslog = get_path(paths, 'output_statslog') output_headslog = get_path(paths, 'output_headslog', throw_error = False) if output_local_dir: output_local_dir = convert_to_list(output_local_dir, 'dir') else: output_local_dir = [] output_files = glob_recursive(output_dir, depth) output_local_files = [f for dir_path in output_local_dir for f in glob_recursive(dir_path, depth)] output_files = set(output_files + output_local_files) if output_statslog: output_statslog = norm_path(output_statslog) _write_stats_log(output_statslog, output_files) if output_headslog: output_headslog = norm_path(output_headslog) _write_heads_log(output_headslog, output_files) message = 'Output logs successfully written!' write_to_makelog(paths, message) print(colored(message, metadata.color_success)) except: error_message = 'Error with `log_files_in_output`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)