def __init__(self, fileinstructions, study_info={}, logfile=None): '''Initialization of the stats computation.''' describ_info = parse_instructions_file(fileinstructions) self.fileinstructions = fileinstructions self.info = describ_info self.stats = None self.study_info = study_info self.logfile = Logger(logfile)
def __init__(self, logfile, pathfolder, precomputers=None, num_cores=None): self._initialization() self._initialization_spec() self.pathfolder = pathfolder self.pathdata = os.path.join(pathfolder, 'Cleaned/Results') if num_cores is None: self.num_cores = multiprocessing.cpu_count() elif num_cores == 0: self.num_cores = 1 else: self.num_cores = num_cores self.logfile = Logger(logfile) if type(logfile) == str else logfile if precomputers is not None: self.precomputers = precomputers else: self.precomputers = PrecomputerCollection(logfile, pathfolder, old_computed=True)
class Statistics(): """The object which performs the computation of the statistics. TODO ---- Check if the variables in the info are in the dataframe and act in consequence. create plots function to create from stats the plots. - Transform this class as a Processer """ def __init__(self, fileinstructions, study_info={}, logfile=None): '''Initialization of the stats computation.''' describ_info = parse_instructions_file(fileinstructions) self.fileinstructions = fileinstructions self.info = describ_info self.stats = None self.study_info = study_info self.logfile = Logger(logfile) def compute_stats(self, dataframe, info=None): '''Function to compute the statistics for all the columns.''' ## 0. Prepare inputs self.info = self.info if info is None else info # Tracking process with logfile t00 = time.time() self.logfile.write_log(message0 % self.fileinstructions) self.logfile.write_log(message1) ## 1. Compute stats stats = [] for i in self.info.index: info_var = dict(self.info.iloc[i]) # Tracking process with logfile t0 = time.time() self.logfile.write_log(message1a % info_var['variables']) # Computing stats of the i-th variable stats.append(compute_stats(dataframe, info_var)) # Stop tracking the process self.logfile.write_log(message2 % (time.time()-t0)) ## 2. Save and return self.stats = stats # TODO: Order by column order!!! countsnull = np.sum(dataframe.notnull()) aux = pd.DataFrame([countsnull, dataframe.shape[0]-countsnull], columns=['non-null', 'null']) self.study_info['global_stats'] = aux # Stop tracking the process self.logfile.write_log(message3 % (time.time()-t00)) self.logfile.write_log(message_close) return stats def to_latex(self, filepath=None): ## Tracking the process t0 = time.time() self.logfile.write_log(message1b) ## 1. Compute transformation doc = describe2latex(self.study_info, self.stats) ## 2. Write output if filepath is None: return doc else: #Write doc with open(filepath, 'w') as myfile: myfile.write(doc) myfile.close() # Stop tracking the process self.logfile.write_log(message2 % (time.time()-t0)) self.logfile.write_log(message_close) def clean_stats(self, stats=None): if stats is None: stats = self.stats stats = clean_dict_stats(stats) return stats
""" Script to test cleanning task """ from Mscthesis.Cleanning import CleanProcess from pythonUtils.Logger import Logger inpath = '/home/tono/mscthesis/code/Data/pruebas_raw/raw1' outpath = '/home/tono/mscthesis/code/Data/pruebas_clean2' logfile = '/home/tono/mscthesis/code/Data/Outputs/Logs/log_clean.log' logger = Logger(logfile) cleaner = CleanProcess(logger) cleaner.clean(inpath, outpath)
def __init__(self, logfile, pathdata): self._initialization() self.logfile = Logger(logfile) if type(logfile) == str else logfile self.pathdata = pathdata
def __init__(self, logfile, bool_inform=False): "Instantiation of the class remembering it is a subclass of Processer." self._initialization() self.logfile = Logger(logfile) if type(logfile) == str else logfile