예제 #1
0
def clear_dir(dir_list):
    """.. Clear directory. Create directory if nonexistent.
    
    Clears all directories in list ``dir_list`` using system command. 
    Safely clears symbolic links. Directories can be specified with the * shell pattern 
    (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__).

    Note
    ----
    To clear a directory means to remove all contents of a directory. 
    If the directory is nonexistent, the directory is created, 
    unless the directory is specified via shell pattern.

    Parameters
    ----------
    dir_list : str, list
        Directory or list of directories to clear.

    Returns
    -------
    None

    Example
    -------
    The following code clears directories ``dir1`` and ``dir2``.

    .. code-block:: python

        clear_dir(['dir1', 'dir2'])

    The following code clears directories beginning with ``dir``.

    .. code-block:: python

        clear_dir(['dir*'])
    """

    try:
        dir_list = convert_to_list(dir_list, 'dir')
        dir_glob = []
        
        for dir_path in dir_list:
            expand = glob.glob(dir_path)
            expand = expand if expand else [dir_path]
            dir_glob.extend(expand)

        remove_dir(dir_glob, quiet = True)

        for dir_path in dir_glob:
            os.makedirs(dir_path)
            message = 'Cleared: `%s`' % dir_path
            print(colored(message, metadata.color_success))
    except:
        error_message = 'Error with `clear_dir`. Traceback can be found below.' 
        error_message = format_message(error_message) 
        raise_from(ColoredError(error_message, traceback.format_exc()), None)
예제 #2
0
def remove_dir(dir_list, quiet = False):
    """.. Remove directory using system command.
    
    Remove directories in list ``dir_list`` using system command. 
    Safely removes symbolic links. Directories can be specified with the * shell pattern 
    (see `here <https://www.gnu.org/software/findutils/manual/html_node/find_html/Shell-Pattern-Matching.html>`__).

    Parameters
    ----------
    dir_list : str, list
        Directory or list of directories to remove.
    quiet : bool, optional
        Suppress printing of directories removed. Defaults to ``False``. 

    Returns
    -------
    None

    Example
    -------
    The following code removes directories ``dir1`` and ``dir2``.

    .. code-block:: python

        remove_dir(['dir1', 'dir2'])

    The following code removes directories beginning with ``dir``.

    .. code-block:: python

        remove_dir(['dir1*'])
    """
    
    try:
        dir_list = convert_to_list(dir_list, 'dir')
        dir_list = [norm_path(dir_path) for dir_path in dir_list]
        dir_list = [d for directory in dir_list for d in glob.glob(directory)]

        for dir_path in dir_list:
            if os.path.isdir(dir_path):
                remove_path(dir_path, quiet = quiet)
            elif os.path.isfile(dir_path): 
                raise_from(TypeError(messages.type_error_not_dir % dir_path), None)
    except:
        error_message = 'Error with `remove_dir`. Traceback can be found below.' 
        error_message = format_message(error_message) 
        raise_from(ColoredError(error_message, traceback.format_exc()), None)
예제 #3
0
    def parse_file_list(self):
        """Parse wildcards in list of files. 
                
        Returns
        -------
        None
        """

        self.file_list = convert_to_list(file_list, 'file')

        file_list_parsed = [
            f for file in self.file_list for f in glob.glob(file)
        ]
        if file_list_parsed:
            self.file_list = file_list_parsed
        else:
            error_list = [str(f) for f in self.file_list]
            raise CritError(messages.crit_error_no_files % error_list)
예제 #4
0
def tablefill(inputs, template, output, null='.'):
    """.. Fill tables for template using inputs.
    
    Fills tables in document ``template`` using files in list ``inputs``. 
    Writes filled document to file ``output``. 
    Null characters in ``inputs`` are replaced with value ``null``.

    Parameters
    ----------
    inputs : list
        Input or list of inputs to fill into template.
    template : str
        Path of template to fill.
    output : str
        Path of output.
    null : str
        Value to replace null characters (i.e., ``''``, ``'.'``, ``'NA'``). Defaults to ``'.'``.

    Returns
    -------
    None
    
    Example
    -------

    .. code-block::

        #################################################################
        #  tablefill_readme.txt - Help/Documentation for tablefill.py
        #################################################################

        Description:
        tablefill.py is a Python module designed to fill LyX/Tex tables with output 
        from text files (usually output from Stata or Matlab).

        Usage:
        Tablefill takes as input a LyX (or Tex) file containing empty tables (the template 
        file) and text files containing data to be copied to  these tables (the 
        input  files), and produces a LyX (or Tex) file with filled tables (the output file). 
        For brevity, LyX will be used to denote LyX or Tex files throughout.

        Tablefill must first be imported to make.py.  This is typically achieved 
        by including the following lines:

        ```
        from gslab_fill.tablefill import tablefill
        ```

        Once the module has been imported, the syntax used to call tablefill is 
        as follows:

        ```
        tablefill(input = 'input_file(s)', template = 'template_file', 
                  output = 'output_file')
        ```

        The argument 'template' is the user written LyX file which contains the 
        tables to be filled in. The argument 'input' is a list of the text files 
        containing the output to be copied to the LyX tables. If there are multiple 
        input text files, they are listed as: input = 'input_file_1 input_file_2'. 
        The argument 'output' is the name of the filled LyX file to be produced.  
        Note that this file is created by tablefill.py and should not be edited 
        manually by the user.

        ###########################
        Input File Format:
        ###########################

        The data needs to be tab-delimited rows of numbers (or characters), 
        preceeded by  `<label>`.  The < and > are mandatory. The numbers can be 
        arbitrarily long, can be negative, and can also be in scientific notation.

        Examples:
        ----------

        ```
        <tab:Test>
        1   2   3
        2   3   1
        3   1   2
        ```

        ```
        <tab:FunnyMat>
        1   2   3   23  2
        2   3
        3   1   2   2
        1
        ```
        (The rows do not need to be of equal length.)

        Completely blank (no tab) lines are ignored.
        If a "cell" is merely "." or "[space]", then it is treated as completely 
        missing. That is, in the program:

        ```
        <tab:Test>
        1   2   3
        2   .   1   3
        3       1   2
        ```

        is equivalent to:
        ```
        <tab:Test>
        1   2   3
        2   1   3
        3   1   2
        ```

        This feature is useful as Stata outputs missing values in numerical 
        variables as ".", and missing values in string variables as "[space]".

        ................................
         Scientific Notation Notes:
        ................................
        The scientific notation ihas to be of the form:
        [numbers].[numbers]e(+/-)[numbers]

        Examples:
        ```
        23.2389e+23
        -2.23e-2
        -0.922e+3
        ```

        ###########################
        Template LyX Format:
        ###########################

        The LyX template file determines where the numbers from the input files are placed.

        Every table in the template file (if it is to be filled) must appear within a float. 
        There must  be one, and only one, table object inside the float, and the table name 
        must include a label  object that corresponds to the label of the required table in 
        the input file.

        Note that table names cannot be duplicated.  For a single template file, each table 
        to be filled must have a unique label, and there must be one, and only one, table with 
        that same label in the text files used as input. Having multiple tables with the
        same name in the input files or in the template file will cause errors.  

        Note also that labels are NOT case-sensitive. That is, <TAB:Table1> is considered
         the same as `<tab:table1>`.

        In the LyX tables, "cells" to be filled with entries from the input text files are 
        indicated by the following tags: 

        `"###"  (no quotes)`
        or 
        `"#[number][,]#"  (no quotes)`

        The first case will result in a literal substitution.  I.e. whatever is in the text 
        tables for that  cell will be copied over. The second case will convert the data 
        table's number (if in scientific notation) and will truncate this converted number 
        to [number] decimal places.  It will automatically round while doing so.

        If a comma appears after the number (within #[number]#), then it will add commas 
        to the digits to the left of the decimal place.

        Examples:
        ---------
        ```
        2309.2093 + ### = 2309.2093
        2309.2093 + #4# = 2309.2093
        2309.2093 + #5# = 2309.20930
        2309.2093 + #20# = 2309.20930000000000000000
        2309.2093 + #3# = 2309.209
        2309.2093 + #2# = 2309.21
        2309.2093 + #0# = 2309
        2309.2093 + #0,# = 2,309
        ```

        ```
        -2.23e-2  + #2# = -0.0223 + #2# = -0.02
        -2.23e-2  + #7# = -0.0223 + #7# = -0.0223000
        -2.23e+10  + #7,# = -22300000000 + #7,# = -22,300,000,000.000000
        ```

        Furthermore, only ###/#num# will be replaced, allowing you to put things around 
        ###/#num# to alter the final output:

        Examples:
        --------

        ```
        2309.2093 + (#2#) = (2309.21)
        2309.2093 + #2#** = 2309.21**
        2309.2093 + ab#2#cd = ab2309.21cd
        ```

        If you are doing exact substitution, then you can use characters:

        Examples:
        ---------
        `abc + ### = abc`

        ................................
         Intentionally blank cells:
        ................................

        If you would like to display a blank cell, you can use "---":

        Examples:
        ---------
        ```
        --- + ### = ---
        --- + #3# = ---
        ```

        ######################
        # Example Combinations 
        #   Of input + template
        ######################


        Example 1 (Simple)
        ----------
        ```
        Input: <tab:Test>
        1   2   3
        2   1   3
        3   1   2

        Template: `<tab:Test> ` (pretend this is what you see in LyX)

        ### ### ###
        ### ### ###
        ### ### ###

        Result:<tab:Test>
        1   2   3
        2   1   3
        3   1   2
        ```

        Example 2 (More Complicated)
        ----------
        ```
        Input: <tab:Test>
        1   .   3
        2e-5    1   3.023
        .   -1  2   3

        Template: <tab:Test>  (pretend this is what you see in LyX)
        (###)   2   ###
        #3# ### #1#
        NA  ### ### ###

        Result:<tab:Test>
        (1) 2   3
        0.000   1   3.0
        NA  -1  2   3
        ```

        ===================
        ====Important======
        ===================
        By design, missings in input table and "missings" in template do not have to 
        line up.

        Example 3 (LyX)
        ----------
        ```
        Input: <tab:Test>
        1   .   3
        2e-5    .   3.023
        .   -1  2

        Template: <tab:Test> 
        ### ### abc
        abc #2# #3#
        NA  ### ###

        Result:<tab:Test>
        1   3   abc
        abc 0.00    3.023
        NA  -1  2

        Recall that to the program, the above input table is no different from:
        1   3
        2e-5    3.023
        -1  2
        ```

        It doesn't "know" where the numbers should be placed within a row, only what 
        the next number to place should be.

        Similarly:

        Example 4 (LyX)
        ----------
        ```
        Input: <tab:Test>
        1   1   2
        1   1   3
        2   -1  2

        Template: <tab:Test>  
        ### ### ###
        abc abc abc
        ### #2# #3#
        ### ### ###

        Result:<tab:Test>
        1   1   2
        abc abc abc
        1   1.00    3.000
        2   -1  2
        ```

        If a row in the template has no substitutions, then it's not really a row from 
        the program's point of view.


        ######################
        # Error Logging
        ######################

        If an error occurs during the call to tablefill, it will be displayed in the 
        command window.  When make.py finishes, the user will be able to scroll up 
        through the output and examine any  error messages.  Error messages, which 
        include a description of the error type and a traceback to the line of code 
        where the error occured, can also be retuned as a string object using the 
        following syntax:

        exitmessage = tablefill( input = 'input_file(s)', template = 'template_file', 
                                 output = 'output_file' )

        Lines can then be added to make.py to output this string to a log file using 
        standard Python and built in gslab_make commands.


        ######################
        # Common Errors
        ######################

        Common mistakes which can lead to errors include:

        - Mismatch between the length of the LyX table and the corresponding text table.  
        If the LyX table has more entries to be filled than the text table has entries to
        fill from, this will cause an error and the table will not be filled.

        - Use of numerical tags (e.g. #1#) to fill non-numerical data.  This will cause 
        an error. Non-numerical data can only be filled using "###", as it does not make 
        sense to round or truncate this data.

        - Multiple table objects in the same float.  Each table float in the template LyX 
        file can only contain one table object.  If a float contains a second table object, 
        this table will not be filled.


        ######################
        # Boldfacing entries
        ######################

        It is straightforward to develop functions that conditionally write entries of 
        tables in boldface; functions may do so by inserting '\series bold' in the lines
        of the filled LyX file immeadiately before phrases that the user wishes to make bold.
    """

    try:
        inputs = convert_to_list(inputs, 'file')
        inputs = [norm_path(file) for file in inputs]
        content = [_parse_content(file, null) for file in inputs]
        tables = {tag: data for (tag, data) in content}
        if (len(content) != len(tables)):
            raise_from(CritError(messages.crit_error_duplicate_tables), None)

        doc = _insert_tables(template, tables, null)

        with io.open(output, 'w', encoding='utf-8') as f:
            f.write(doc)
    except:
        error_message = 'Error with `tablefill`. Traceback can be found below.'
        error_message = format_message(error_message)
        raise_from(ColoredError(error_message, traceback.format_exc()), None)
예제 #5
0
def log_files_in_output(paths,
                        depth = float('inf')):
    """.. Log files in output directory.

    Logs the following information for all files contained in directory ``output_dir``.

    - File name (in file ``output_statslog``)
    - Last modified (in file ``output_statslog``)
    - File size (in file ``output_statslog``)
    - File head (in file ``output_headslog``, optional)

    When walking through directory ``output_dir``, float ``depth`` determines level of depth to walk. 
    Status messages are appended to file ``makelog``. 

    Include additional output directories to walk through 
    (typically directories that you wish to keep local) in directory list ``output_local_dir``. 

    Parameters
    ----------
    paths : dict 
        Dictionary of paths. Dictionary should contain values for all keys listed below.
    depth : float, optional
        Level of depth when walking through output directory. Defaults to infinite.

    Path Keys
    ---------
    output_dir : str
       Path of output directory.
    output_local_dir : str, list, optional
       Path or list of paths of local output directories. Defaults to ``[]`` (i.e., none).
    output_statslog : str
       Path to write output statistics log.
    output_headslog : str, optional
       Path to write output headers log.
    makelog : str
       Path of makelog.

    Returns
    -------
    None

    Example
    -------
    The following code will log information for all files contained in 
    only the first level of ``paths['output_dir']``. 
    Therefore, files contained in subdirectories will be ignored.
    
    .. code-block:: python

        log_files_in_outputs(paths, depth = 1)

    The following code will log information for any file in ``paths['output_dir']``, 
    regardless of level of subdirectory.
    
    .. code-block :: python

        log_files_in_outputs(paths, depth = float('inf'))
    """
    
    try:
        output_dir      =  get_path(paths, 'output_dir')
        output_local_dir = get_path(paths, 'output_local_dir', throw_error = False) 
        output_statslog  = get_path(paths, 'output_statslog')
        output_headslog  = get_path(paths, 'output_headslog', throw_error = False)
        
        if output_local_dir:
            output_local_dir = convert_to_list(output_local_dir, 'dir') 
        else:
            output_local_dir = []

        output_files = glob_recursive(output_dir, depth)
        output_local_files = [f for dir_path in output_local_dir for f in glob_recursive(dir_path, depth)]   
        output_files = set(output_files + output_local_files)

        if output_statslog:
            output_statslog = norm_path(output_statslog)
            _write_stats_log(output_statslog, output_files)
        
        if output_headslog:
            output_headslog = norm_path(output_headslog)
            _write_heads_log(output_headslog, output_files)
        
        message = 'Output logs successfully written!'
        write_to_makelog(paths, message)
        print(colored(message, metadata.color_success))  
    except:
        error_message = 'Error with `log_files_in_output`. Traceback can be found below.' 
        error_message = format_message(error_message)
        write_to_makelog(paths, error_message + '\n\n' + traceback.format_exc())
        raise_from(ColoredError(error_message, traceback.format_exc()), None)