def run_module(root, module, build_script='make.py', osname=None): """.. Run module. Runs script `build_script` in module directory `module` relative to root of repository `root`. Parameters ---------- root : str Directory of root. module: str Name of module. build_script : str Name of build script. Defaults to ``make.py``. osname : str, optional Name of OS. Used to determine syntax of system command. Defaults to ``os.name``. Returns ------- None Example ------- The following code runs the script ``root/module/make.py``. .. code-block:: python run_module(root = 'root', module = 'module') """ osname = osname if osname else os.name # https://github.com/sphinx-doc/sphinx/issues/759 try: module_dir = os.path.join(root, module) os.chdir(module_dir) build_script = norm_path(build_script) if not os.path.isfile(build_script): raise CritError(messages.crit_error_no_file % build_script) message = 'Running module `%s`' % module message = format_message(message) message = colored(message, attrs=['bold']) print('\n' + message) status = os.system( '%s %s' % (metadata.default_executables[osname]['python'], build_script)) if status != 0: raise ProgramError() except ProgramError: sys.exit() except: error_message = 'Error with `run_module`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def zip_dir(source_dir, zip_dest): """.. Zip directory to file. Zips directory ``source_dir`` to file ``zip_dest``. Parameters ---------- source_dir : str Path of directory to zip. zip_dest : str Destination of zip file. Returns ------- None """ try: with zipfile.ZipFile('%s' % (zip_dest), 'w', zipfile.ZIP_DEFLATED, allowZip64 = True) as z: source_dir = norm_path(source_dir) for root, dirs, files in os.walk(source_dir): for f in files: file_path = os.path.join(root, f) file_name = os.path.basename(file_path) z.write(file_path, file_name) message = 'Zipped: `%s` as `%s`' % (file_path, file_name) print(colored(message, metadata.color_success)) except: error_message = 'Error with `zip_dir`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def get_modified_sources(paths, source_map, depth=float('inf')): """.. Get source files considered changed by git. Checks the modification status for all sources contained in list ``source_map`` (returned by `sourcing functions`_). Produces warning if sources have been modified according to git. When walking through sources, float ``depth`` determines level of depth to walk. Warning messages are appended to file ``makelog``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. source_map : list Mapping of sources (returned from `sourcing functions`_). depth : float, optional Level of depth when walking through source directories. Defaults to infinite. Path Keys --------- makelog : str Path of makelog. Returns ------- overlap : list List of source files considered changed by git. Notes ----- """ try: source_list = [source for source, destination in source_map] source_list = [glob_recursive(source, depth) for source in source_list] source_files = [f for source in source_list for f in source] source_files = set(source_files) try: repo = git.Repo('.', search_parent_directories=True) except: raise_from(CritError(messages.crit_error_no_repo), None) modified = _get_git_status(repo) overlap = [l for l in source_files if l in modified] if overlap: if len(overlap) > 100: overlap = overlap[0:100] overlap = overlap + [ "and more (file list truncated due to length)" ] message = messages.warning_modified_files % '\n'.join(overlap) write_to_makelog(paths, message) print(colored(message, metadata.color_failure)) except: error_message = 'Error with `get_modified_sources`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def _check_stata_output(output): """.. Check Stata output""" regex = "end of do-file[\s]*r\([0-9]*\);" if re.search(regex, output): error_message = 'Stata program executed with errors.' error_message = format_message(error_message) raise_from(ProgramError(error_message, 'See makelog for more detail.'), None)
def clear_dir(dir_list): """.. Clear directory. Create directory if nonexistent. Clears all directories in list ``dir_list`` using system command. Safely clears symbolic links. Directories can be specified with the * shell pattern (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__). Note ---- To clear a directory means to remove all contents of a directory. If the directory is nonexistent, the directory is created, unless the directory is specified via shell pattern. Parameters ---------- dir_list : str, list Directory or list of directories to clear. Returns ------- None Example ------- The following code clears directories ``dir1`` and ``dir2``. .. code-block:: python clear_dir(['dir1', 'dir2']) The following code clears directories beginning with ``dir``. .. code-block:: python clear_dir(['dir*']) """ try: dir_list = convert_to_list(dir_list, 'dir') dir_glob = [] for dir_path in dir_list: expand = glob.glob(dir_path) expand = expand if expand else [dir_path] dir_glob.extend(expand) remove_dir(dir_glob, quiet = True) for dir_path in dir_glob: os.makedirs(dir_path) message = 'Cleared: `%s`' % dir_path print(colored(message, metadata.color_success)) except: error_message = 'Error with `clear_dir`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def check_conda_status(root): """.. Makes sure that the repository is being run with conda and is up to date. Checks that conda is activated. Produces warning if it is not. Produces warning if setup/conda_env.yaml has been altered more recently than the . Parameters ---------- root : str Directory of root. Returns ------- None Notes ----- """ python_executable = sys.executable # Check if currently in a conda env if 'conda' in python_executable: try: conda_info = os.path.join(root, '.conda_info') conda_info_new = os.path.join(root, '.conda_info_new') if os.path.exists(conda_info): os.system('conda list --export > %s' % conda_info_new) if filecmp.cmp(conda_info, conda_info_new): os.system('rm %s' % conda_info_new) else: os.system('rm %s' % conda_info) os.system('mv %s %s' % (conda_info_new, conda_info)) info_time = os.path.getmtime(conda_info) info_time = datetime.fromtimestamp(info_time) conda_yaml = os.path.join(root, 'setup', 'conda_env.yaml') yaml_time = os.path.getmtime(conda_yaml) yaml_time = datetime.fromtimestamp(yaml_time) if yaml_time > info_time: print(colored(messages.warning_old_conda, 'red')) else: os.system('conda list --export > %s' % conda_info) except: error_message = 'Error with `check_conda_status`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None) else: print(colored(messages.warning_not_conda, 'red'))
def remove_path(path, option = '', quiet = False): """.. Remove path using system command. Remove path ``path`` using system command. Safely removes symbolic links. Path can be specified with the * shell pattern (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__). Parameters ---------- path : str Path to remove. option : str, optional Options for system command. Defaults to ``-rf`` for POSIX and ``/s /q`` for NT. quiet : bool, optional Suppress printing of path removed. Defaults to ``False``. Returns ------- None Example ------- The following code removes path ``path``. .. code-block:: python remove_path('path') The following code removes all paths beginning with ``path``. .. code-block:: python remove_path('path*') """ try: path = norm_path(path) if not option: option = metadata.default_options[os.name]['rmdir'] command = metadata.commands[os.name]['rmdir'] % (option, path) process = subprocess_fix.Popen(command, shell = True) process.wait() # ACTION ITEM: ADD DEBUGGING TO SUBPROCESS CALL if not quiet: message = 'Removed: `%s`' % path print(colored(message, metadata.color_success)) except: error_message = 'Error with `remove_path`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def end_makelog(paths): """.. End make log. Appends to file ``makelog``, recording end time. Note ---- We allow for writing to a make log even after the make log has ended. We do not recommend this for best practice. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. Path Keys --------- makelog : str Path of makelog. Returns ------- None """ try: makelog = get_path(paths, 'makelog') if makelog: makelog = norm_path(makelog) message = 'Ending makelog file at: `%s`' % makelog print(colored(message, metadata.color_success)) if not (metadata.makelog_started and os.path.isfile(makelog)): raise_from(CritError(messages.crit_error_no_makelog % makelog), None) with open(makelog, 'a', encoding='utf8') as MAKELOG: time_end = str(datetime.datetime.now().replace(microsecond=0)) working_dir = os.getcwd() print(messages.note_dash_line, file=MAKELOG) print(messages.note_makelog_end + time_end, file=MAKELOG) print(messages.note_working_directory + working_dir, file=MAKELOG) print(messages.note_dash_line, file=MAKELOG) except: error_message = 'Error with `end_makelog`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def remove_dir(dir_list, quiet = False): """.. Remove directory using system command. Remove directories in list ``dir_list`` using system command. Safely removes symbolic links. Directories can be specified with the * shell pattern (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__). Parameters ---------- dir_list : str, list Directory or list of directories to remove. quiet : bool, optional Suppress printing of directories removed. Defaults to ``False``. Returns ------- None Example ------- The following code removes directories ``dir1`` and ``dir2``. .. code-block:: python remove_dir(['dir1', 'dir2']) The following code removes directories beginning with ``dir``. .. code-block:: python remove_dir(['dir1*']) """ try: dir_list = convert_to_list(dir_list, 'dir') dir_list = [norm_path(dir_path) for dir_path in dir_list] dir_list = [d for directory in dir_list for d in glob.glob(directory)] for dir_path in dir_list: if os.path.isdir(dir_path): remove_path(dir_path, quiet = quiet) elif os.path.isfile(dir_path): raise_from(TypeError(messages.type_error_not_dir % dir_path), None) except: error_message = 'Error with `remove_dir`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def start_makelog(paths): """.. Start make log. Writes file ``makelog``, recording start time. Sets make log status to boolean ``True``, which is used by other functions to confirm make log exists. Note ---- The make log start condition is used by other functions to confirm a make log exists. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. Path Keys --------- makelog : str Path of makelog. Returns ------- None """ try: makelog = get_path(paths, 'makelog') metadata.makelog_started = True if makelog: makelog = norm_path(makelog) message = 'Starting makelog file at: `%s`' % makelog print(colored(message, metadata.color_success)) with open(makelog, 'w', encoding='utf8') as MAKELOG: time_start = str( datetime.datetime.now().replace(microsecond=0)) working_dir = os.getcwd() print(messages.note_dash_line, file=MAKELOG) print(messages.note_makelog_start + time_start, file=MAKELOG) print(messages.note_working_directory + working_dir, file=MAKELOG) print(messages.note_dash_line, file=MAKELOG) except: error_message = 'Error with `start_makelog`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def update_internal_paths(paths): """.. Update within-directory paths using default configuration file. Returns dictionary ``paths`` with directory locations listed in file ``config``. Parameters ---------- paths : dict Dictionary of paths to update. Dictionary should ex-ante contain values for all keys listed below. Path Keys --------- root : str Path of project repo root config : str Path of user configuration file. Returns ------- paths : dict Dictionary of paths. """ try: config_default = get_path(paths, 'config') config_default = open_yaml(config_default) root = get_path(paths, 'root') relative_paths = {path_label: os.path.join(root, path) for \ path_label, path in config_default['make_paths']['root_relative'].items()} absolute_paths = config_default['make_paths']['absolute'] paths.update(relative_paths) paths.update(absolute_paths) return (paths) except: error_message = 'Error with update_external_paths. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def update_executables(paths, osname=None): """.. Update executable names using user configuration file. Updates executable names with executables listed in file ``config_user``. Note ---- Executable names are used by :ref:`program functions <program functions>`. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. osname : str, optional Name of OS. Defaults to ``os.name``. Path Keys --------- config_user : str Path of user configuration file. Returns ------- None """ osname = osname if osname else os.name # https://github.com/sphinx-doc/sphinx/issues/759 try: config_user = get_path(paths, 'config_user') config_user = open_yaml(config_user) _check_os(osname) if config_user['local']['executables']: metadata.default_executables[osname].update( config_user['local']['executables']) except: error_message = 'Error with update_executables. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def update_paths(paths): """.. Update paths using user configuration file. Updates dictionary ``paths`` with externals listed in file ``config_user``. Note ---- The ``paths`` argument for :ref:`sourcing functions<sourcing functions>` is used not only to get default paths for writing/logging, but also to `string format <https://docs.python.org/3.4/library/string.html#format-string-syntax>`__ sourcing instructions. Parameters ---------- paths : dict Dictionary of paths to update. Dictionary should ex-ante contain values for all keys listed below. Path Keys --------- config_user : str Path of user configuration file. Returns ------- paths : dict Dictionary of updated paths. """ try: config_user = get_path(paths, 'config_user') config_user = open_yaml(config_user) if config_user['external']: paths.update(config_user['external']) return (paths) except: error_message = 'Error with update_paths. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def unzip(zip_path, output_dir): """.. Unzip file to directory. Unzips file ``zip_path`` to directory ``output_dir``. Parameters ---------- zip_path : str Path of file to unzip. output_dir : str Directory to write outputs of unzipped file. Returns ------- None """ try: with zipfile.ZipFile(zip_path, allowZip64=True) as z: z.extractall(output_dir) except: error_message = 'Error with `zip_path`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def write_source_logs(paths, source_map, depth=float('inf')): """.. Write source logs. Logs the following information for sources contained in list ``source_map`` (returned by :ref:`sourcing functions<sourcing functions>`). - Mapping of symlinks/copies to sources (in file ``source_maplog``) - Details on files contained in sources: - File name (in file ``source_statslog``) - Last modified (in file ``source_statslog``) - File size (in file ``source_statslog``) - File head (in file ``source_headlog``, optional) When walking through sources, float ``depth`` determines level of depth to walk. Status messages are appended to file ``makelog``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. source_map : list Mapping of symlinks/copies (destination) to sources (returned by :ref:`sourcing functions<sourcing functions>`). depth : float, optional Level of depth when walking through source directories. Defaults to infinite. Path Keys --------- source_statslog : str Path to write source statistics log. source_headslog : str, optional Path to write source headers log. source_maplog : str Path to write source map log. makelog : str Path of makelog. Returns ------- None Example ------- The following code will log information for all files listed in ``source_map``. Therefore, files contained in directories listed in ``source_map`` will be ignored. .. code-block:: python write_source_logs(paths, depth = 1) The following code will log information for all files listed in ``source_map`` and any file in all directories listed in ``source_map``, regardless of level of subdirectory. .. code-block :: python write_source_logs(paths, depth = float('inf')) """ try: source_statslog = get_path(paths, 'source_statslog') source_headslog = get_path(paths, 'source_headslog', throw_error=False) source_maplog = get_path(paths, 'source_maplog') source_list = [source for source, destination in source_map] source_list = [glob_recursive(source, depth) for source in source_list] source_files = [f for source in source_list for f in source] source_files = set(source_files) # ACTION: DECIDE WHETHER TO ALLOW FOR RAW DIRECTORY raw_dir = get_path(paths, 'raw_dir', throw_error=False) if raw_dir: raw_files = glob_recursive(raw_dir) source_files = set(source_files + raw_files) if source_statslog: source_statslog = norm_path(source_statslog) _write_stats_log(source_statslog, source_files) if source_headslog: source_headslog = norm_path(source_headslog) _write_heads_log(source_headslog, source_files) if source_maplog: source_maplog = norm_path(source_maplog) _write_source_maplog(source_maplog, source_map) message = 'Source logs successfully written!' write_to_makelog(paths, message) print(colored(message, metadata.color_success)) except: error_message = 'Error with `write_source_logs`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def tablefill(inputs, template, output, null='.'): """.. Fill tables for template using inputs. Fills tables in document ``template`` using files in list ``inputs``. Writes filled document to file ``output``. Null characters in ``inputs`` are replaced with value ``null``. Parameters ---------- inputs : list Input or list of inputs to fill into template. template : str Path of template to fill. output : str Path of output. null : str Value to replace null characters (i.e., ``''``, ``'.'``, ``'NA'``). Defaults to ``'.'``. Returns ------- None Example ------- .. code-block:: ################################################################# # tablefill_readme.txt - Help/Documentation for tablefill.py ################################################################# Description: tablefill.py is a Python module designed to fill LyX/Tex tables with output from text files (usually output from Stata or Matlab). Usage: Tablefill takes as input a LyX (or Tex) file containing empty tables (the template file) and text files containing data to be copied to these tables (the input files), and produces a LyX (or Tex) file with filled tables (the output file). For brevity, LyX will be used to denote LyX or Tex files throughout. Tablefill must first be imported to make.py. This is typically achieved by including the following lines: ``` from gslab_fill.tablefill import tablefill ``` Once the module has been imported, the syntax used to call tablefill is as follows: ``` tablefill(input = 'input_file(s)', template = 'template_file', output = 'output_file') ``` The argument 'template' is the user written LyX file which contains the tables to be filled in. The argument 'input' is a list of the text files containing the output to be copied to the LyX tables. If there are multiple input text files, they are listed as: input = 'input_file_1 input_file_2'. The argument 'output' is the name of the filled LyX file to be produced. Note that this file is created by tablefill.py and should not be edited manually by the user. ########################### Input File Format: ########################### The data needs to be tab-delimited rows of numbers (or characters), preceeded by `<label>`. The < and > are mandatory. The numbers can be arbitrarily long, can be negative, and can also be in scientific notation. Examples: ---------- ``` <tab:Test> 1 2 3 2 3 1 3 1 2 ``` ``` <tab:FunnyMat> 1 2 3 23 2 2 3 3 1 2 2 1 ``` (The rows do not need to be of equal length.) Completely blank (no tab) lines are ignored. If a "cell" is merely "." or "[space]", then it is treated as completely missing. That is, in the program: ``` <tab:Test> 1 2 3 2 . 1 3 3 1 2 ``` is equivalent to: ``` <tab:Test> 1 2 3 2 1 3 3 1 2 ``` This feature is useful as Stata outputs missing values in numerical variables as ".", and missing values in string variables as "[space]". ................................ Scientific Notation Notes: ................................ The scientific notation ihas to be of the form: [numbers].[numbers]e(+/-)[numbers] Examples: ``` 23.2389e+23 -2.23e-2 -0.922e+3 ``` ########################### Template LyX Format: ########################### The LyX template file determines where the numbers from the input files are placed. Every table in the template file (if it is to be filled) must appear within a float. There must be one, and only one, table object inside the float, and the table name must include a label object that corresponds to the label of the required table in the input file. Note that table names cannot be duplicated. For a single template file, each table to be filled must have a unique label, and there must be one, and only one, table with that same label in the text files used as input. Having multiple tables with the same name in the input files or in the template file will cause errors. Note also that labels are NOT case-sensitive. That is, <TAB:Table1> is considered the same as `<tab:table1>`. In the LyX tables, "cells" to be filled with entries from the input text files are indicated by the following tags: `"###" (no quotes)` or `"#[number][,]#" (no quotes)` The first case will result in a literal substitution. I.e. whatever is in the text tables for that cell will be copied over. The second case will convert the data table's number (if in scientific notation) and will truncate this converted number to [number] decimal places. It will automatically round while doing so. If a comma appears after the number (within #[number]#), then it will add commas to the digits to the left of the decimal place. Examples: --------- ``` 2309.2093 + ### = 2309.2093 2309.2093 + #4# = 2309.2093 2309.2093 + #5# = 2309.20930 2309.2093 + #20# = 2309.20930000000000000000 2309.2093 + #3# = 2309.209 2309.2093 + #2# = 2309.21 2309.2093 + #0# = 2309 2309.2093 + #0,# = 2,309 ``` ``` -2.23e-2 + #2# = -0.0223 + #2# = -0.02 -2.23e-2 + #7# = -0.0223 + #7# = -0.0223000 -2.23e+10 + #7,# = -22300000000 + #7,# = -22,300,000,000.000000 ``` Furthermore, only ###/#num# will be replaced, allowing you to put things around ###/#num# to alter the final output: Examples: -------- ``` 2309.2093 + (#2#) = (2309.21) 2309.2093 + #2#** = 2309.21** 2309.2093 + ab#2#cd = ab2309.21cd ``` If you are doing exact substitution, then you can use characters: Examples: --------- `abc + ### = abc` ................................ Intentionally blank cells: ................................ If you would like to display a blank cell, you can use "---": Examples: --------- ``` --- + ### = --- --- + #3# = --- ``` ###################### # Example Combinations # Of input + template ###################### Example 1 (Simple) ---------- ``` Input: <tab:Test> 1 2 3 2 1 3 3 1 2 Template: `<tab:Test> ` (pretend this is what you see in LyX) ### ### ### ### ### ### ### ### ### Result:<tab:Test> 1 2 3 2 1 3 3 1 2 ``` Example 2 (More Complicated) ---------- ``` Input: <tab:Test> 1 . 3 2e-5 1 3.023 . -1 2 3 Template: <tab:Test> (pretend this is what you see in LyX) (###) 2 ### #3# ### #1# NA ### ### ### Result:<tab:Test> (1) 2 3 0.000 1 3.0 NA -1 2 3 ``` =================== ====Important====== =================== By design, missings in input table and "missings" in template do not have to line up. Example 3 (LyX) ---------- ``` Input: <tab:Test> 1 . 3 2e-5 . 3.023 . -1 2 Template: <tab:Test> ### ### abc abc #2# #3# NA ### ### Result:<tab:Test> 1 3 abc abc 0.00 3.023 NA -1 2 Recall that to the program, the above input table is no different from: 1 3 2e-5 3.023 -1 2 ``` It doesn't "know" where the numbers should be placed within a row, only what the next number to place should be. Similarly: Example 4 (LyX) ---------- ``` Input: <tab:Test> 1 1 2 1 1 3 2 -1 2 Template: <tab:Test> ### ### ### abc abc abc ### #2# #3# ### ### ### Result:<tab:Test> 1 1 2 abc abc abc 1 1.00 3.000 2 -1 2 ``` If a row in the template has no substitutions, then it's not really a row from the program's point of view. ###################### # Error Logging ###################### If an error occurs during the call to tablefill, it will be displayed in the command window. When make.py finishes, the user will be able to scroll up through the output and examine any error messages. Error messages, which include a description of the error type and a traceback to the line of code where the error occured, can also be retuned as a string object using the following syntax: exitmessage = tablefill( input = 'input_file(s)', template = 'template_file', output = 'output_file' ) Lines can then be added to make.py to output this string to a log file using standard Python and built in gslab_make commands. ###################### # Common Errors ###################### Common mistakes which can lead to errors include: - Mismatch between the length of the LyX table and the corresponding text table. If the LyX table has more entries to be filled than the text table has entries to fill from, this will cause an error and the table will not be filled. - Use of numerical tags (e.g. #1#) to fill non-numerical data. This will cause an error. Non-numerical data can only be filled using "###", as it does not make sense to round or truncate this data. - Multiple table objects in the same float. Each table float in the template LyX file can only contain one table object. If a float contains a second table object, this table will not be filled. ###################### # Boldfacing entries ###################### It is straightforward to develop functions that conditionally write entries of tables in boldface; functions may do so by inserting '\series bold' in the lines of the filled LyX file immeadiately before phrases that the user wishes to make bold. """ try: inputs = convert_to_list(inputs, 'file') inputs = [norm_path(file) for file in inputs] content = [_parse_content(file, null) for file in inputs] tables = {tag: data for (tag, data) in content} if (len(content) != len(tables)): raise_from(CritError(messages.crit_error_duplicate_tables), None) doc = _insert_tables(template, tables, null) with io.open(output, 'w', encoding='utf-8') as f: f.write(doc) except: error_message = 'Error with `tablefill`. Traceback can be found below.' error_message = format_message(error_message) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def log_files_in_output(paths, depth = float('inf')): """.. Log files in output directory. Logs the following information for all files contained in directory ``output_dir``. - File name (in file ``output_statslog``) - Last modified (in file ``output_statslog``) - File size (in file ``output_statslog``) - File head (in file ``output_headslog``, optional) When walking through directory ``output_dir``, float ``depth`` determines level of depth to walk. Status messages are appended to file ``makelog``. Include additional output directories to walk through (typically directories that you wish to keep local) in directory list ``output_local_dir``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. depth : float, optional Level of depth when walking through output directory. Defaults to infinite. Path Keys --------- output_dir : str Path of output directory. output_local_dir : str, list, optional Path or list of paths of local output directories. Defaults to ``[]`` (i.e., none). output_statslog : str Path to write output statistics log. output_headslog : str, optional Path to write output headers log. makelog : str Path of makelog. Returns ------- None Example ------- The following code will log information for all files contained in only the first level of ``paths['output_dir']``. Therefore, files contained in subdirectories will be ignored. .. code-block:: python log_files_in_outputs(paths, depth = 1) The following code will log information for any file in ``paths['output_dir']``, regardless of level of subdirectory. .. code-block :: python log_files_in_outputs(paths, depth = float('inf')) """ try: output_dir = get_path(paths, 'output_dir') output_local_dir = get_path(paths, 'output_local_dir', throw_error = False) output_statslog = get_path(paths, 'output_statslog') output_headslog = get_path(paths, 'output_headslog', throw_error = False) if output_local_dir: output_local_dir = convert_to_list(output_local_dir, 'dir') else: output_local_dir = [] output_files = glob_recursive(output_dir, depth) output_local_files = [f for dir_path in output_local_dir for f in glob_recursive(dir_path, depth)] output_files = set(output_files + output_local_files) if output_statslog: output_statslog = norm_path(output_statslog) _write_stats_log(output_statslog, output_files) if output_headslog: output_headslog = norm_path(output_headslog) _write_heads_log(output_headslog, output_files) message = 'Output logs successfully written!' write_to_makelog(paths, message) print(colored(message, metadata.color_success)) except: error_message = 'Error with `log_files_in_output`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def check_module_size(paths): """.. Check file sizes for module. Checks file sizes for files to be committed in the current working directory. Compares file sizes to size limits in file ``config`` and produces warnings if any of the following limits are exceeded. - Individual size of a file tracked by git lfs (``file_MB_limit_lfs``) - Total size of all files tracked by git lfs (``total_MB_limit_lfs``) - Individual size of a file tracked by git (``file_MB_limit``) - Total size of all files tracked by git (``total_MB_limit``) Warning messages are appended to file ``makelog``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. Path Keys --------- config : str Path of project configuration file. makelog : str Path of makelog. Returns ------- None """ try: git_files, git_lfs_files = _get_dir_sizes('.') file_MB, total_MB, file_MB_lfs, total_MB_lfs = _get_size_values( git_files, git_lfs_files) config = get_path(paths, 'config') config = open_yaml(config) max_file_sizes = config['max_file_sizes'] print_message = '' if file_MB > max_file_sizes['file_MB_limit']: print_message = print_message + messages.warning_git_file_print % max_file_sizes[ 'file_MB_limit'] if total_MB > max_file_sizes['total_MB_limit']: print_message = print_message + messages.warning_git_repo % max_file_sizes[ 'total_MB_limit'] if file_MB_lfs > max_file_sizes['file_MB_limit_lfs']: print_message = print_message + messages.warning_git_lfs_file_print % max_file_sizes[ 'file_MB_limit_lfs'] if total_MB_lfs > max_file_sizes['total_MB_limit_lfs']: print_message = print_message + messages.warning_git_lfs_repo % max_file_sizes[ 'total_MB_limit_lfs'] print_message = print_message.strip() log_message = '' if file_MB > max_file_sizes['file_MB_limit']: log_message = log_message + messages.warning_git_file_log % max_file_sizes[ 'file_MB_limit'] exceed_files = [ f for (f, s) in git_files.items() if s / (1024**2) > max_file_sizes['file_MB_limit'] ] exceed_files = '\n'.join(exceed_files) log_message = log_message + '\n' + exceed_files if total_MB > max_file_sizes['total_MB_limit']: log_message = log_message + messages.warning_git_repo % max_file_sizes[ 'total_MB_limit'] if file_MB_lfs > max_file_sizes['file_MB_limit_lfs']: log_message = log_message + messages.warning_git_lfs_file_log % max_file_sizes[ 'file_MB_limit_lfs'] exceed_files = [ f for (f, s) in git_lfs_files.items() if s / (1024**2) > max_file_sizes['file_MB_limit_lfs'] ] exceed_files = '\n'.join(exceed_files) log_message = log_message + '\n' + exceed_files if total_MB_lfs > max_file_sizes['total_MB_limit_lfs']: log_message = log_message + messages.warning_git_lfs_repo % max_file_sizes[ 'total_MB_limit_lfs'] log_message = log_message.strip() if print_message: print(colored(print_message, metadata.color_failure)) if log_message: write_to_makelog(paths, log_message) except: error_message = 'Error with `check_repo_size`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def execute_command(paths, command, **kwargs): """.. Run system command. Runs system command `command` with shell execution boolean ``shell``. Outputs are appended to file ``makelog`` and written to system command log file ``log``. Status messages are appended to file ``makelog``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. command : str System command to run. shell : `bool`, optional See `here <https://docs.python.org/3/library/subprocess.html#frequently-used-arguments>`_. Defaults to ``True``. log : str, optional Path of system command log. System command log is only written if specified. Defaults to ``''`` (i.e., not written). Path Keys --------- makelog : str Path of makelog. Note ---- We recommend leaving all other parameters to their defaults. Other Parameters ---------------- osname : str, optional Name of OS. Used to check if OS is supported. Defaults to ``os.name``. Returns ------- None Example ------- The following code executes the ``ls`` command, writes outputs to system command log file ``'file'``, and appends outputs and/or status messages to ``paths['makelog']``. .. code-block:: python execute_command(paths, 'ls', log = 'file') """ try: makelog = get_path(paths, 'makelog') direct = Directive(makelog=makelog, **kwargs) # Execute exit_code, stderr = direct.execute_command(command) direct.write_log() if exit_code != 0: error_message = 'Command executed with errors. Traceback can be found below.' error_message = format_message(error_message) raise_from(ProgramError(error_message, stderr), None) except ProgramError: raise except: error_message = 'Error with `execute_command`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def link_inputs(paths, file_list): """.. Create symlinks to inputs from list of files containing linking instructions. Create symbolic links using instructions contained in files of list ``file_list``. Instructions are `string formatted <https://docs.python.org/3.4/library/string.html#format-string-syntax>`__ using paths dictionary ``paths``. Symbolic links are written in directory ``input_dir``. Status messages are appended to file ``make log``. Instruction files on how to create symbolic links (destinations) from targets (sources) should be formatted in the following way. .. code-block:: md # Each line of instruction should contain a destination and source delimited by a `|` # Lines beginning with # are ignored destination | source .. Note:: Symbolic links can be created to both files and directories. .. Note:: Instruction files can be specified with the * shell pattern (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__). Destinations and their sources can also be specified with the * shell pattern. The number of wildcards must be the same for both destinations and sources. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. Dictionary additionally used to string format linking instructions. file_list : str, list File or list of files containing linking instructions. Path Keys --------- input_dir : str Directory to write symlinks. makelog : str Path of makelog. Returns ------- source_map : list List of (source, destination) for each symlink created. Example ------- Suppose you call the following function. .. code-block:: python link_inputs(paths, ['file1'], formatting_dict) Suppose ``paths`` contained the following values. .. code-block:: md paths = {'root': '/User/root/', 'makelog': 'make.log', 'input_dir': 'input'} Now suppose instruction file ``file1`` contained the following text. .. code-block:: md destination1 | {root}/source1 The ``{root}`` in the instruction file would be string formatted using ``paths``. Therefore, the function would parse the instruction as: .. code-block:: md destination1 | /User/root/source1 Example ------- The following code would use instruction files ``file1`` and ``file2`` to create symbolic links. .. code-block:: python link_inputs(paths, ['file1', 'file2']) Suppose instruction file ``file1`` contained the following text. .. code-block:: md destination1 | source1 destination2 | source2 Symbolic links ``destination1`` and ``destination1`` would be created in directory ``paths['input_dir']``. Their targets would be ``source1`` and ``source2``, respectively. Example ------- Suppose you have the following targets. .. code-block:: md source1 source2 source3 Specifying ``destination* | source*`` in one of your instruction files would create the following symbolic links in ``paths['input_dir']``. .. code-block:: md destination1 destination2 destination3 """ try: paths['move_dir'] = get_path(paths, 'input_dir') source_map = _create_links(paths, file_list) message = 'Input links successfully created!' write_to_makelog(paths, message) print(colored(message, metadata.color_success)) return (source_map) except: error_message = 'An error was encountered with `link_inputs`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def run_latex(paths, program, **kwargs): """.. Run LaTeX script using system command. Compiles document ``program`` using system command, with document specified in the form of ``script.tex``. Status messages are appended to file ``makelog``. PDF outputs are written in directory ``output_dir``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. program : str Path of script to run. Path Keys --------- makelog : str Path of makelog. output_dir : str Directory to write PDFs. Note ---- We recommend leaving all other parameters to their defaults. Note ---- This function creates and removes a directory named ``latex_auxiliary_dir``. Other Parameters ---------------- osname : str, optional Name of OS. Used to determine syntax of system command. Defaults to ``os.name``. shell : `bool`, optional See `here <https://docs.python.org/3/library/subprocess.html#frequently-used-arguments>`_. Defaults to ``True``. log : str, optional Path of program log. Program log is only written if specified. Defaults to ``''`` (i.e., not written). executable : str, optional Executable to use for system command. Defaults to executable specified in :ref:`default settings<default settings>`. option : str, optional Options for system command. Defaults to options specified in :ref:`default settings<default settings>`. args : str, optional Not applicable. Returns ------- None Example ------- .. code-block:: python run_latex(paths, program = 'script.tex') """ try: makelog = get_path(paths, 'makelog') output_dir = get_path(paths, 'output_dir') direct = LyXDirective(output_dir=output_dir, application='latex', program=program, makelog=makelog, **kwargs) temp_name = direct.program_name temp_program = direct.program # Generate folder for auxiliary files os.mkdir('latex_auxiliary_dir') # Execute command = metadata.commands[direct.osname][direct.application] % ( direct.executable, direct.option, temp_program) exit_code, stderr = direct.execute_command(command) direct.write_log() if exit_code != 0: error_message = 'LaTeX program executed with errors. Traceback can be found below.' error_message = format_message(error_message) raise_from(ProgramError(error_message, stderr), None) # Move PDF output temp_pdf = os.path.join('latex_auxiliary_dir', temp_name + '.pdf') output_pdf = os.path.join(direct.output_dir, direct.program_name + '.pdf') if temp_pdf != output_pdf: shutil.copy2(temp_pdf, output_pdf) shutil.rmtree('latex_auxiliary_dir') # Remove auxiliary files except ProgramError: raise except: error_message = 'Error with `run_latex`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def run_lyx(paths, program, doctype='', **kwargs): """.. Run LyX script using system command. Compiles document ``program`` using system command, with document specified in the form of ``script.lyx``. Status messages are appended to file ``makelog``. PDF outputs are written in directory ``output_dir``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. program : str Path of script to run. doctype : str, optional Type of LyX document. Takes either ``'handout'`` and ``'comments'``. All other strings will default to standard document type. Defaults to ``''`` (i.e., standard document type). Path Keys --------- makelog : str Path of makelog. output_dir : str Directory to write PDFs. Note ---- We recommend leaving all other parameters to their defaults. Other Parameters ---------------- osname : str, optional Name of OS. Used to determine syntax of system command. Defaults to ``os.name``. shell : `bool`, optional See `here <https://docs.python.org/3/library/subprocess.html#frequently-used-arguments>`_. Defaults to ``True``. log : str, optional Path of program log. Program log is only written if specified. Defaults to ``''`` (i.e., not written). executable : str, optional Executable to use for system command. Defaults to executable specified in :ref:`default settings<default settings>`. option : str, optional Options for system command. Defaults to options specified in :ref:`default settings<default settings>`. args : str, optional Not applicable. Returns ------- None Example ------- .. code-block:: python run_lyx(paths, program = 'script.lyx') """ try: makelog = get_path(paths, 'makelog') output_dir = get_path(paths, 'output_dir') direct = LyXDirective(output_dir=output_dir, doctype=doctype, application='lyx', program=program, makelog=makelog, **kwargs) # Make handout/comments LyX file if direct.doctype: temp_name = os.path.join(direct.program_name + '_' + direct.doctype) temp_program = os.path.join(direct.program_dir, temp_name + '.lyx') beamer = False shutil.copy2(direct.program, temp_program) for line in fileinput.input(temp_program, inplace=True, backup='.bak'): if r'\textclass beamer' in line: beamer = True if direct.doctype == 'handout' and beamer and (r'\options' in line): line = line.rstrip('\n') + ', handout\n' elif direct.doctype == 'comments' and ( r'\begin_inset Note Note' in line): line = line.replace('Note Note', 'Note Greyedout') print(line) else: temp_name = direct.program_name temp_program = direct.program # Execute command = metadata.commands[direct.osname][direct.application] % ( direct.executable, direct.option, temp_program) exit_code, stderr = direct.execute_command(command) direct.write_log() if exit_code != 0: error_message = 'LyX program executed with errors. Traceback can be found below.' error_message = format_message(error_message) raise_from(ProgramError(error_message, stderr), None) # Move PDF output temp_pdf = os.path.join(direct.program_dir, temp_name + '.pdf') output_pdf = os.path.join(direct.output_dir, direct.program_name + '.pdf') if temp_pdf != output_pdf: shutil.copy2(temp_pdf, output_pdf) os.remove(temp_pdf) # Remove handout/comments LyX file if direct.doctype: os.remove(temp_program) except ProgramError: raise except: error_message = 'Error with `run_lyx`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def run_stata(paths, program, **kwargs): """.. Run Stata script using system command. Runs script ``program`` using system command, with script specified in the form of ``script.do``. Status messages are appended to file ``makelog``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. program : str Path of script to run. Path Keys --------- makelog : str Path of makelog. Note ---- We recommend leaving all other parameters to their defaults. Note ---- When a do-file contains a space in its name, different version of Stata save the corresponding log file with different names. Some versions of Stata truncate the name to everything before the first space of the do-file name. Other Parameters ---------------- osname : str, optional Name of OS. Used to determine syntax of system command. Defaults to ``os.name``. shell : `bool`, optional See `here <https://docs.python.org/3/library/subprocess.html#frequently-used-arguments>`_. Defaults to ``True``. log : str, optional Path of program log. Program log is only written if specified. Defaults to ``''`` (i.e., not written). executable : str, optional Executable to use for system command. Defaults to executable specified in :ref:`default settings<default settings>`. option : str, optional Options for system command. Defaults to options specified in :ref:`default settings<default settings>`. args : str, optional Not applicable. Returns ------- None Example ------- .. code-block:: python run_stata(paths, program = 'script.do') """ try: makelog = get_path(paths, 'makelog') direct = ProgramDirective(application='stata', program=program, makelog=makelog, **kwargs) # Get program output (partial) program_name = direct.program.split(" ")[0] program_name = os.path.split(program_name)[-1] program_name = os.path.splitext(program_name)[0] program_log_partial = os.path.join(os.getcwd(), program_name + '.log') # Get program output (full) program_log_full = os.path.join(os.getcwd(), direct.program_name + '.log') # Sanitize program if direct.osname == "posix": direct.program = re.escape(direct.program) # Execute command = metadata.commands[direct.osname]['stata'] % ( direct.executable, direct.option, direct.program) exit_code, stderr = direct.execute_command(command) if exit_code != 0: error_message = 'Stata program executed with errors. Traceback can be found below.' error_message = format_message(error_message) raise_from(ProgramError(error_message, stderr), None) try: output = direct.move_program_output(program_log_partial, direct.log) except: output = direct.move_program_output(program_log_full, direct.log) _check_stata_output(output) except ProgramError: raise except: error_message = 'Error with `run_stata`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def run_stat_transfer(paths, program, **kwargs): """.. Run StatTransfer script using system command. Runs script ``program`` using system command, with script specified in the form of ``script.stc`` or ``script.stcmd``. Status messages are appended to file ``makelog``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. program : str Path of script to run. Path Keys --------- makelog : str Path of makelog. Note ---- We recommend leaving all other parameters to their defaults. Other Parameters ---------------- osname : str, optional Name of OS. Used to determine syntax of system command. Defaults to ``os.name``. shell : `bool`, optional See `here <https://docs.python.org/3/library/subprocess.html#frequently-used-arguments>`_. Defaults to ``True``. log : str, optional Path of program log. Program log is only written if specified. Defaults to ``''`` (i.e., not written). executable : str, optional Executable to use for system command. Defaults to executable specified in :ref:`default settings<default settings>`. option : str, optional Options for system command. Defaults to options specified in :ref:`default settings<default settings>`. args : str, optional Not applicable. Returns ------- None Example ------- .. code-block:: python run_stat_transfer(paths, program = 'script.stc') """ try: makelog = get_path(paths, 'makelog') direct = ProgramDirective(application='st', program=program, makelog=makelog, **kwargs) # Execute command = metadata.commands[direct.osname][direct.application] % ( direct.executable, direct.program) exit_code, stderr = direct.execute_command(command) direct.write_log() if exit_code != 0: error_message = 'StatTransfer program executed with errors. Traceback can be found below.' error_message = format_message(error_message) raise_from(ProgramError(error_message, stderr), None) except ProgramError: raise except: error_message = 'Error with `run_stat_transfer`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)
def run_jupyter(paths, program, timeout=None, kernel_name=''): """.. Run Jupyter notebook using system command. Runs notebook ``program`` using Python API, with notebook specified in the form of ``notebook.ipynb``. Status messages are appended to file ``makelog``. Parameters ---------- paths : dict Dictionary of paths. Dictionary should contain values for all keys listed below. program : str Path of script to run. Path Keys --------- makelog : str Path of makelog. Note ---- We recommend leaving all other parameters to their defaults. Other Parameters ---------------- timeout : int, optional Time to wait (in seconds) to finish executing a cell before raising exception. Defaults to no timeout. kernel_name : str, optional Name of kernel to use for execution (e.g., ``python2`` for standard Python 2 kernel, ``python3`` for standard Python 3 kernel). Defaults to ``''`` (i.e., kernel specified in notebook). Returns ------- None Example ------- .. code-block:: python run_jupyter(paths, program = 'notebook.ipynb') """ try: program = norm_path(program) with open(program) as f: message = 'Processing notebook: `%s`' % program write_to_makelog(paths, message) print(colored(message, 'cyan')) if not kernel_name: kernel_name = 'python%s' % sys.version_info[0] ep = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) nb = nbformat.read(f, as_version=4) ep.preprocess(nb, {'metadata': {'path': '.'}}) with open(program, 'wt') as f: nbformat.write(nb, f) except: error_message = 'Error with `run_jupyter`. Traceback can be found below.' error_message = format_message(error_message) write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc()) raise_from(ColoredError(error_message, traceback.format_exc()), None)