Beispiel #1
0
    def test_xlsx(self):
        """Should massively read XLSX files from a directory.

        Should read XLSX files in a directory  and generate a dict
        with file and sheet names as keys and dataframes with the data
        as values.

        """
        dir_path = self.base_path + '/excel/'
        data = extractor.xlsx(dir_path, sep=None, encoding=None)

        self.assertEqual(len(data['prueba_excel.xlsx']), 4)
        self.assertEqual(len(data['excel_prueba.xlsx']), 4)
        self.assertEqual(type(data['prueba_excel.xlsx']['Hoja1']),
                         pd.core.frame.DataFrame)
        self.assertEqual(type(data['prueba_excel.xlsx']['Hoja3']),
                         pd.core.frame.DataFrame)
        self.assertEqual(type(data['excel_prueba.xlsx']['Hoja1']),
                         pd.core.frame.DataFrame)
        self.assertEqual(type(data['excel_prueba.xlsx']['Hoja2']),
                         pd.core.frame.DataFrame)
Beispiel #2
0
    f.truncate()
    # re-write file and close
    f.write(text)
    f.close()


def round_avoiding_errors(value, decimals):
    try:
        _ = round(value, decimals)
        return _
    except:
        return value


# Read  input files
data = xlsx(cfg.path.input)

# Value and trend files for each indicator
for key in cfg.series:

    # Drop NA rows, if any
    data[cfg.file][cfg.series[key].sheet].dropna(axis=0,
                                                 how='all',
                                                 inplace=True)

    # Rename variables

    if key not in ['empresas_afectadas_erte', 'afiliados_afectados_erte']:
        if cfg.series[key].trend_vars == []:
            data[cfg.file][cfg.series[key].sheet].rename(columns={
                cfg.series[key].value_vars[0]:
Beispiel #3
0
from etlstat.extractor import extractor
from cfg import cfg
from pyjstat import pyjstat
import pandas as pd

cases = extractor.xlsx(cfg.input.path)
municipios = extractor.csv(cfg.input.path, sep=',')
cases_df = cases['cruce qlik.xlsx']['cruce_qlik']
municipios_df = municipios['municipios.csv']

cases_df.fillna(value=cfg.fillna, inplace=True)
cases_df.replace(to_replace=cfg.replace.codpostal.old,
                 value=cfg.replace.codpostal.new,
                 inplace=True)
cases_df['codpostal'] = cases_df['codpostal'].astype(int)
cases_df['codpostal'] = cases_df['codpostal'].astype(object)

# TODO: extract constants to config - maybe a dict
cases_df['localidad'].replace('SANTANDER - P',
                              'SANTANDER (SANTANDER)',
                              inplace=True)
cases_df['localidad'].replace('ENTRAMBASAGUAS - P',
                              'ENTRAMBASAGUAS (ENTRAMBASAGUAS)',
                              inplace=True)
cases_df['localidad'].replace('SANTIURDE DE TORANZO - P',
                              'SANTIURDE DE TORANZO (SANTIURDE DE TORANZO)',
                              inplace=True)
cases_df['localidad'].replace('BARCENA DE CICERO - P',
                              'BARCENA DE CICERO (BARCENA DE CICERO)',
                              inplace=True)
cases_df['localidad'].replace('LAREDO - P', 'LAREDO (LAREDO)', inplace=True)