def __init__(self, fileinstructions, study_info={}, logfile=None):
     '''Initialization of the stats computation.'''
     describ_info = parse_instructions_file(fileinstructions)
     self.fileinstructions = fileinstructions
     self.info = describ_info
     self.stats = None
     self.study_info = study_info
     self.logfile = Logger(logfile)
Beispiel #2
0
    def __init__(self, logfile, pathfolder, precomputers=None, num_cores=None):
        self._initialization()
        self._initialization_spec()
        self.pathfolder = pathfolder
        self.pathdata = os.path.join(pathfolder, 'Cleaned/Results')

        if num_cores is None:
            self.num_cores = multiprocessing.cpu_count()
        elif num_cores == 0:
            self.num_cores = 1
        else:
            self.num_cores = num_cores

        self.logfile = Logger(logfile) if type(logfile) == str else logfile
        if precomputers is not None:
            self.precomputers = precomputers
        else:
            self.precomputers = PrecomputerCollection(logfile,
                                                      pathfolder,
                                                      old_computed=True)
class Statistics():
    """The object which performs the computation of the statistics.

    TODO
    ----
    Check if the variables in the info are in the dataframe and act in
    consequence.
    create plots function to create from stats the plots.
    - Transform this class as a Processer

    """

    def __init__(self, fileinstructions, study_info={}, logfile=None):
        '''Initialization of the stats computation.'''
        describ_info = parse_instructions_file(fileinstructions)
        self.fileinstructions = fileinstructions
        self.info = describ_info
        self.stats = None
        self.study_info = study_info
        self.logfile = Logger(logfile)

    def compute_stats(self, dataframe, info=None):
        '''Function to compute the statistics for all the columns.'''
        ## 0. Prepare inputs
        self.info = self.info if info is None else info
        # Tracking process with logfile
        t00 = time.time()
        self.logfile.write_log(message0 % self.fileinstructions)
        self.logfile.write_log(message1)
        ## 1. Compute stats
        stats = []
        for i in self.info.index:
            info_var = dict(self.info.iloc[i])
            # Tracking process with logfile
            t0 = time.time()
            self.logfile.write_log(message1a % info_var['variables'])
            # Computing stats of the i-th variable
            stats.append(compute_stats(dataframe, info_var))
            # Stop tracking the process
            self.logfile.write_log(message2 % (time.time()-t0))
        ## 2. Save and return
        self.stats = stats
        # TODO: Order by column order!!!
        countsnull = np.sum(dataframe.notnull())
        aux = pd.DataFrame([countsnull, dataframe.shape[0]-countsnull],
                           columns=['non-null', 'null'])
        self.study_info['global_stats'] = aux
        # Stop tracking the process
        self.logfile.write_log(message3 % (time.time()-t00))
        self.logfile.write_log(message_close)
        return stats

    def to_latex(self, filepath=None):
        ## Tracking the process
        t0 = time.time()
        self.logfile.write_log(message1b)
        ## 1. Compute transformation
        doc = describe2latex(self.study_info, self.stats)
        ## 2. Write output
        if filepath is None:
            return doc
        else:
            #Write doc
            with open(filepath, 'w') as myfile:
                myfile.write(doc)
            myfile.close()
        # Stop tracking the process
        self.logfile.write_log(message2 % (time.time()-t0))
        self.logfile.write_log(message_close)

    def clean_stats(self, stats=None):
        if stats is None:
            stats = self.stats
        stats = clean_dict_stats(stats)
        return stats
"""
Script to test cleanning task
"""

from Mscthesis.Cleanning import CleanProcess
from pythonUtils.Logger import Logger

inpath = '/home/tono/mscthesis/code/Data/pruebas_raw/raw1'
outpath = '/home/tono/mscthesis/code/Data/pruebas_clean2'
logfile = '/home/tono/mscthesis/code/Data/Outputs/Logs/log_clean.log'

logger = Logger(logfile)
cleaner = CleanProcess(logger)
cleaner.clean(inpath, outpath)
 def __init__(self, logfile, pathdata):
     self._initialization()
     self.logfile = Logger(logfile) if type(logfile) == str else logfile
     self.pathdata = pathdata
Beispiel #6
0
 def __init__(self, logfile, bool_inform=False):
     "Instantiation of the class remembering it is a subclass of Processer."
     self._initialization()
     self.logfile = Logger(logfile) if type(logfile) == str else logfile