def header_file(self):
        # for this case we use a template file and update the data
        df_base = pd.read_csv(self.template_name)
        df_base['Codigo region'] = df_base['Codigo region'].fillna(0)
        df_base['Codigo comuna'] = df_base['Codigo comuna'].fillna(0)
        df_base['Comuna'] = df_base['Comuna'].fillna(0)
        todrop = df_base.loc[df_base['Comuna'] == 0]
        df_base.drop(todrop.index, inplace=True)
        df_base['Codigo region'] = df_base['Codigo region'].astype(int)
        df_base['Codigo comuna'] = df_base['Codigo comuna'].astype(int)

        desconocido = df_base['Codigo comuna'] != 0
        df_base['Codigo comuna'].where(desconocido, '', inplace=True)

        self.Comp = df_base.loc[df_base['Comuna'] != 'Total']
        self.Comp.reset_index(inplace=True)
        utils.desconocidoName(self.Comp)

        for k in range(len(self.Comp)):
            if self.Comp.loc[k, 'Codigo region'] < 10:
                self.Comp.loc[k, 'Codigo region'] = '0' + str(
                    self.Comp.loc[k, 'Codigo region'])
            else:
                self.Comp.loc[k, 'Codigo region'] = str(
                    self.Comp.loc[k, 'Codigo region'])

            if self.Comp.loc[k, 'Codigo comuna'] != '':
                if self.Comp.loc[k, 'Codigo comuna'] < 10000:
                    self.Comp.loc[k, 'Codigo comuna'] = '0' + str(
                        self.Comp.loc[k, 'Codigo comuna'])
                else:
                    self.Comp.loc[k, 'Codigo comuna'] = str(
                        self.Comp.loc[k, 'Codigo comuna'])

        self.comuna = self.Comp['Comuna']
    def last_to_csv(self):
        df_base = pd.read_csv(
            '../input/DistribucionDEIS/baseFiles/DEIS_template.csv')
        df_base['Codigo region'] = df_base['Codigo region'].fillna(0)
        df_base['Codigo comuna'] = df_base['Codigo comuna'].fillna(0)
        todrop = df_base.loc[df_base['Comuna'] == 0]
        df_base['Comuna'] = df_base['Comuna'].fillna(0)
        df_base.drop(todrop.index, inplace=True)
        df_base['Codigo region'] = df_base['Codigo region'].astype(int)
        df_base['Codigo comuna'] = df_base['Codigo comuna'].astype(int)

        desconocido = df_base['Codigo comuna'] != 0
        df_base['Codigo comuna'].where(desconocido, '', inplace=True)

        Comp = df_base.loc[df_base['Comuna'] != 'Total']
        Comp.reset_index(inplace=True)
        utils.desconocidoName(Comp)
        for k in range(len(Comp)):
            if Comp.loc[k, 'Codigo region'] < 10:
                Comp.loc[k, 'Codigo region'] = '0' + str(
                    Comp.loc[k, 'Codigo region'])
            else:
                Comp.loc[k, 'Codigo region'] = str(Comp.loc[k,
                                                            'Codigo region'])

            if Comp.loc[k, 'Codigo comuna'] != '':
                if Comp.loc[k, 'Codigo comuna'] < 10000:
                    Comp.loc[k, 'Codigo comuna'] = '0' + str(
                        Comp.loc[k, 'Codigo comuna'])
                else:
                    Comp.loc[k,
                             'Codigo comuna'] = str(Comp.loc[k,
                                                             'Codigo comuna'])

        comuna = Comp['Comuna']

        self.last_added = self.last_added.dropna(subset=['Fecha defunciones'])
        self.last_added.sort_values(
            by=['region_residencia', 'comuna_residencia', 'edad'],
            inplace=True)
        self.last_added.rename(columns={'comuna_residencia': 'comuna'},
                               inplace=True)
        self.last_added.rename(columns={'region_residencia': 'Region'},
                               inplace=True)
        self.last_added = utils.normalizaNombreCodigoRegionYComuna(
            self.last_added)
        df_sup = Comp[['Codigo comuna', 'Comuna']]
        df_sup['Codigo comuna'] = df_sup['Codigo comuna'].replace('', 0)
        self.last_added.drop(columns={'Comuna'}, inplace=True)
        for k in range(len(self.last_added)):
            if self.last_added.loc[k, 'Codigo comuna'] != '':
                if self.last_added.loc[k, 'Codigo comuna'] < 10000:
                    self.last_added.loc[k, 'Codigo comuna'] = '0' + str(
                        self.last_added.loc[k, 'Codigo comuna'])
                else:
                    self.last_added.loc[k, 'Codigo comuna'] = str(
                        self.last_added.loc[k, 'Codigo comuna'])
        self.last_added = self.last_added.merge(df_sup,
                                                on="Codigo comuna",
                                                how="left")
        self.last_added.set_index('Comuna', inplace=True)

        columns_name = self.last_added.columns.values

        maxSE = self.last_added[columns_name[4]].max()
        minSE = self.last_added[columns_name[4]].min()

        print(minSE, maxSE)
        lenSE = (pd.to_datetime(maxSE) - pd.to_datetime(minSE)).days + 1
        startdate = pd.to_datetime(minSE)
        date_list = pd.date_range(startdate, periods=lenSE).tolist()
        date_list = [dt.datetime.strftime(x, "%Y-%m-%d") for x in date_list]
        print(date_list)

        #vector con las variables
        SE_comuna = self.last_added[columns_name[4]]

        for edad in [
                '-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90-'
        ]:
            if edad == '-39':
                df_edad = self.last_added[self.last_added['edad'] <= 39]
            if edad == '40-49':
                df_edad = self.last_added[self.last_added['edad'] <= 49]
                df_edad = df_edad[df_edad['edad'] >= 40]
            if edad == '50-59':
                df_edad = self.last_added[self.last_added['edad'] <= 59]
                df_edad = df_edad[df_edad['edad'] >= 50]
            if edad == '60-69':
                df_edad = self.last_added[self.last_added['edad'] <= 69]
                df_edad = df_edad[df_edad['edad'] >= 60]
            if edad == '70-79':
                df_edad = self.last_added[self.last_added['edad'] <= 79]
                df_edad = df_edad[df_edad['edad'] >= 70]
            if edad == '80-89':
                df_edad = self.last_added[self.last_added['edad'] <= 89]
                df_edad = df_edad[df_edad['edad'] >= 80]
            if edad == '90-':
                df_edad = self.last_added[self.last_added['edad'] >= 90]

            for k in [5, 6, 7]:
                df = pd.DataFrame(np.zeros((len(comuna), lenSE)))

                dicts = {}
                keys = range(lenSE)
                # values = [i for i in range(lenSE)]

                for i in keys:
                    dicts[i] = date_list[i]

                df.rename(columns=dicts, inplace=True)
                value_comuna = df_edad[columns_name[k]]
                value_comuna.fillna(0, inplace=True)
                i = 0
                for row in df_edad.index:
                    idx = comuna.loc[comuna == row].index.values
                    if idx.size > 0:
                        col = SE_comuna[i]
                        df[col][idx] = value_comuna[i].astype(int)

                    i += 1

                df_output = pd.concat([Comp, df], axis=1)
                df_output.drop(columns=['index'], axis=1, inplace=True)

                nComunas = [
                    len(list(group))
                    for key, group in groupby(df_output['Codigo region'])
                ]

                identifiers = [
                    'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                ]
                variables = [
                    x for x in df_output.columns if x not in identifiers
                ]

                begRow = 0

                for i in range(len(nComunas)):

                    endRow = begRow + nComunas[i]

                    firstList = df_output[identifiers].iloc[endRow -
                                                            1].values.tolist()
                    firstList[2] = 'Total'
                    firstList[3] = ''

                    valuesTotal = df_output[variables][begRow:endRow].sum(
                        axis=0).tolist()

                    regionTotal = pd.DataFrame(
                        (firstList + valuesTotal),
                        index=df_output.columns.values).transpose()

                    if i < len(nComunas) - 1:
                        blank_line = pd.Series(
                            np.empty((len(regionTotal), 0)).tolist())

                        regionTotal = pd.concat([regionTotal, blank_line],
                                                axis=0)
                        regionTotal.drop(columns=0, axis=1, inplace=True)

                    temp = pd.concat(
                        [df_output.iloc[begRow:endRow], regionTotal], axis=0)
                    if i == 0:
                        outputDF2 = temp
                    else:
                        outputDF2 = pd.concat([outputDF2, temp], axis=0)

                    if i < len(nComunas) - 1:
                        begRow = endRow

                    outputDF2.reset_index(inplace=True)
                    outputDF2.drop(columns=['index'], axis=1, inplace=True)
                    outputDF2[variables] = outputDF2[variables].dropna(
                    )  # .astype(int)

                    print(outputDF2.head(20))

                    outputDF2.dropna(how='all', inplace=True)
                    todrop = outputDF2.loc[outputDF2['Comuna'] == 'Total']
                    outputDF2.drop(todrop.index, inplace=True)

                if k == 5:
                    name = self.output + '_' + str(edad) + '_confirmadas.csv'
                    outputDF2.to_csv(name, index=False)
                    outputDF2_T = outputDF2.T
                    outputDF2_T.to_csv(name.replace('.csv', '_T.csv'),
                                       header=False)
                    identifiers = [
                        'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                    ]
                    variables = [
                        x for x in outputDF2.columns if x not in identifiers
                    ]
                    outputDF2_std = pd.melt(outputDF2,
                                            id_vars=identifiers,
                                            value_vars=variables,
                                            var_name='Fecha',
                                            value_name='Confirmada')
                    outputDF2_std.to_csv(name.replace('.csv', '_std.csv'),
                                         index=False)

                elif k == 6:
                    name = self.output + '_' + str(edad) + '_sospechosas.csv'
                    outputDF2.to_csv(name, index=False)
                    outputDF2_T = outputDF2.T
                    outputDF2_T.to_csv(name.replace('.csv', '_T.csv'),
                                       header=False)
                    identifiers = [
                        'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                    ]
                    variables = [
                        x for x in outputDF2.columns if x not in identifiers
                    ]
                    outputDF2_std = pd.melt(outputDF2,
                                            id_vars=identifiers,
                                            value_vars=variables,
                                            var_name='Fecha',
                                            value_name='Sospechosa')
                    outputDF2_std.to_csv(name.replace('.csv', '_std.csv'),
                                         index=False)

                elif k == 7:
                    name = self.output + '_' + str(edad) + '_totales.csv'
                    outputDF2.to_csv(name, index=False)
                    outputDF2_T = outputDF2.T
                    outputDF2_T.to_csv(name.replace('.csv', '_T.csv'),
                                       header=False)
                    identifiers = [
                        'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                    ]
                    variables = [
                        x for x in outputDF2.columns if x not in identifiers
                    ]
                    outputDF2_std = pd.melt(outputDF2,
                                            id_vars=identifiers,
                                            value_vars=variables,
                                            var_name='Fecha',
                                            value_name='Total')
                    outputDF2_std.to_csv(name.replace('.csv', '_std.csv'),
                                         index=False)
    def last_to_csv(self):
        df_base = pd.read_csv('../input/DistribucionDEIS/baseFiles/DEIS_template.csv')
        df_base['Codigo region'] = df_base['Codigo region'].fillna(0)
        df_base['Codigo comuna'] = df_base['Codigo comuna'].fillna(0)
        todrop = df_base.loc[df_base['Comuna'] == 0]
        df_base['Comuna'] = df_base['Comuna'].fillna(0)
        df_base.drop(todrop.index, inplace=True)
        df_base['Codigo region'] = df_base['Codigo region'].astype(int)
        df_base['Codigo comuna'] = df_base['Codigo comuna'].astype(int)

        desconocido = df_base['Codigo comuna'] != 0
        df_base['Codigo comuna'].where(desconocido, '', inplace=True)

        Comp = df_base.loc[df_base['Comuna'] != 'Total']
        Comp.reset_index(inplace=True)
        utils.desconocidoName(Comp)
        for k in range(len(Comp)):
            if Comp.loc[k, 'Codigo region'] < 10:
                Comp.loc[k, 'Codigo region'] = '0' + str(Comp.loc[k, 'Codigo region'])
            else:
                Comp.loc[k, 'Codigo region'] = str(Comp.loc[k, 'Codigo region'])

            if Comp.loc[k, 'Codigo comuna'] != '':
                if Comp.loc[k, 'Codigo comuna'] < 10000:
                    Comp.loc[k, 'Codigo comuna'] = '0' + str(Comp.loc[k, 'Codigo comuna'])
                else:
                    Comp.loc[k, 'Codigo comuna'] = str(Comp.loc[k, 'Codigo comuna'])

        comuna = Comp['Comuna']

        self.last_added = self.last_added.dropna(subset=['Fecha defunciones'])
        self.last_added.sort_values(by=['region_residencia','comuna_residencia','edad'], inplace=True)
        self.last_added.rename(columns={'comuna_residencia': 'comuna'}, inplace=True)
        self.last_added.rename(columns={'region_residencia': 'Region'}, inplace=True)
        self.last_added = utils.normalizaNombreCodigoRegionYComuna(self.last_added)
        df_sup = Comp[['Codigo comuna', 'Comuna']]
        df_sup['Codigo comuna'] = df_sup['Codigo comuna'].replace('', 0)
        self.last_added.drop(columns={'Comuna'}, inplace=True)
        for k in range(len(self.last_added)):
            if self.last_added.loc[k, 'Codigo comuna'] != '':
                if self.last_added.loc[k, 'Codigo comuna'] < 10000:
                    self.last_added.loc[k, 'Codigo comuna'] = '0' + str(self.last_added.loc[k, 'Codigo comuna'])
                else:
                    self.last_added.loc[k, 'Codigo comuna'] = str(self.last_added.loc[k, 'Codigo comuna'])
        self.last_added = self.last_added.merge(df_sup, on="Codigo comuna", how="left")
        self.last_added.set_index('Comuna', inplace=True)

        columns_name = self.last_added.columns.values

        maxSE = self.last_added[columns_name[4]].max()
        minSE = self.last_added[columns_name[4]].min()

        #print(minSE, maxSE)
        lenSE = (pd.to_datetime(maxSE) - pd.to_datetime(minSE)).days + 1
        startdate = pd.to_datetime(minSE)
        date_list = pd.date_range(startdate, periods=lenSE).tolist()
        date_list = [dt.datetime.strftime(x, "%Y-%m-%d") for x in date_list]

        def edad2rango(df,comuna):
            cols = df.columns.tolist()
            df2 = pd.DataFrame(columns = cols)
            p = 0
            for row in comuna:
                aux = df.loc[df.index == row]
                aux2 = aux.groupby(['Fecha defunciones']).sum()
                aux2['Comuna'] = row
                aux2.set_index(['Comuna'], inplace=True)

                identifiers = ['Region', 'Codigo region', 'Codigo comuna', 'Fecha defunciones']
                temp = aux[identifiers].copy()
                temp.drop_duplicates(keep='first', inplace=True)
                temp2 = pd.concat([temp,aux2], axis=1)

                if p == 0:
                    df2 = temp2
                    p +=1
                else:
                    df2 = pd.concat([df2,temp2], axis=0)

            return df2

        kl = 0

        for edad in ['<=39','40-49','50-59','60-69','70-79','80-89','>=90']:
            if edad == '<=39':
                df_edad = self.last_added[self.last_added['edad'] <= 39].copy()
            if edad == '40-49':
                df_edad = self.last_added[self.last_added['edad'] <= 49].copy()
                df_edad = df_edad[df_edad['edad'] >= 40]
            if edad == '50-59':
                df_edad = self.last_added[self.last_added['edad'] <= 59].copy()
                df_edad = df_edad[df_edad['edad'] >= 50]
            if edad == '60-69':
                df_edad = self.last_added[self.last_added['edad'] <= 69].copy()
                df_edad = df_edad[df_edad['edad'] >= 60]
            if edad == '70-79':
                df_edad = self.last_added[self.last_added['edad'] <= 79].copy()
                df_edad = df_edad[df_edad['edad'] >= 70]
            if edad == '80-89':
                df_edad = self.last_added[self.last_added['edad'] <= 89].copy()
                df_edad = df_edad[df_edad['edad'] >= 80]
            if edad == '>=90':
                df_edad = self.last_added[self.last_added['edad'] >= 90].copy()

            df_edad.drop(columns=['edad'], inplace=True)
            df_edad.sort_values(by=['Fecha defunciones'], inplace=True)

            df_edad2 = edad2rango(df_edad,comuna)

            for k in [5,6,7]:
                df = pd.DataFrame(np.zeros((len(comuna), lenSE)))

                dicts = {}
                keys = range(lenSE)

                for i in keys:
                    dicts[i] = date_list[i]

                df.rename(columns=dicts, inplace=True)
                value_comuna = df_edad2[columns_name[k]].copy()
                value_comuna.fillna(0,inplace=True)
                SE_comuna = df_edad2['Fecha defunciones'].copy()

                j=0
                for row in df_edad2.index:
                    idx = comuna.loc[comuna == row].index.values
                    if idx.size > 0:
                        col = SE_comuna[j]
                        df[col][idx] = value_comuna[j].astype(int)

                    j += 1

                df_output = pd.concat([Comp, df], axis=1)
                df_output.drop(columns=['index'], axis=1, inplace=True)
                df_output['Edad'] = str(edad)

                identifiers = ['Region', 'Codigo region', 'Comuna', 'Codigo comuna','Poblacion', 'Edad']
                variables = [x for x in df_output.columns if x not in identifiers]

                cols = identifiers + variables
                df_output = df_output[cols].copy()

                i = 0
                if i == 0:
                    outputDF2 = df_output
                    i += 1
                else:
                    outputDF2 = pd.concat([outputDF2, df_output], axis=0)

                outputDF2.reset_index(drop=True, inplace=True)
                outputDF2[variables] = outputDF2[variables].dropna()  # .astype(int)

                outputDF2.dropna(how='all', inplace=True)

                if k == 5:

                    if kl == 0:
                        outputDF3_c = outputDF2
                    else:
                        outputDF3_c = pd.concat([outputDF3_c,outputDF2], axis=0)

                elif k == 6:

                    if kl == 0:
                        outputDF3_s = outputDF2
                    else:
                        outputDF3_s = pd.concat([outputDF3_s,outputDF2], axis=0)

                elif k == 7:

                    if kl == 0:
                        outputDF3_t = outputDF2
                    else:
                        outputDF3_t = pd.concat([outputDF3_t,outputDF2], axis=0)


            kl += 1

        outputDF3_c.sort_values(by=['Region', 'Codigo region', 'Comuna', 'Codigo comuna'], inplace=True)
        outputDF3_s.sort_values(by=['Region', 'Codigo region', 'Comuna', 'Codigo comuna'], inplace=True)
        outputDF3_t.sort_values(by=['Region', 'Codigo region', 'Comuna', 'Codigo comuna'], inplace=True)
        outputDF3_c.dropna(inplace=True)
        outputDF3_s.dropna(inplace=True)
        outputDF3_t.dropna(inplace=True)
        outputDF3_c.drop(['Poblacion'], axis=1, inplace=True)
        outputDF3_s.drop(['Poblacion'], axis=1, inplace=True)
        outputDF3_t.drop(['Poblacion'], axis=1, inplace=True)

        name = self.output + '_confirmadas.csv'
        outputDF3_c.to_csv(name, index=False)
        outputDF3_c_T = outputDF3_c.T
        outputDF3_c_T.to_csv(name.replace('.csv', '_T.csv'), header=False)
        identifiers = ['Region', 'Codigo region', 'Comuna', 'Codigo comuna','Edad']
        variables = [x for x in outputDF3_c.columns if x not in identifiers]
        outputDF3_std = pd.melt(outputDF3_c, id_vars=identifiers, value_vars=variables, var_name='Fecha', value_name='Total')
        outputDF3_std.to_csv(name.replace('.csv', '_std.csv'), index=False)

        name = self.output + '_sospechosas.csv'
        outputDF3_s.to_csv(name, index=False)
        outputDF3_s_T = outputDF3_s.T
        outputDF3_s_T.to_csv(name.replace('.csv', '_T.csv'), header=False)
        identifiers = ['Region', 'Codigo region', 'Comuna', 'Codigo comuna','Edad']
        variables = [x for x in outputDF3_s.columns if x not in identifiers]
        outputDF3_std = pd.melt(outputDF3_s, id_vars=identifiers, value_vars=variables, var_name='Fecha',
                                value_name='Total')
        outputDF3_std.to_csv(name.replace('.csv', '_std.csv'), index=False)

        name = self.output + '_totales.csv'
        outputDF3_t.to_csv(name, index=False)
        outputDF3_t_T = outputDF3_t.T
        outputDF3_t_T.to_csv(name.replace('.csv', '_T.csv'), header=False)
        identifiers = ['Region', 'Codigo region', 'Comuna', 'Codigo comuna','Edad']
        variables = [x for x in outputDF3_t.columns if x not in identifiers]
        outputDF3_std = pd.melt(outputDF3_t, id_vars=identifiers, value_vars=variables, var_name='Fecha', value_name='Total')
        outputDF3_std.to_csv(name.replace('.csv', '_std.csv'), index=False)
Exemple #4
0
    def last_to_csv(self):
        if self.indicador == 'fabricante':
            ## campana por fabricante

            self.last_added.rename(columns={'Dose': 'Dosis'}, inplace=True)
            self.last_added.rename(columns={'Type': 'Fabricante'},
                                   inplace=True)

            self.last_added["Dosis"] = self.last_added["Dosis"].replace({
                "First":
                "Primera",
                "Second":
                "Segunda"
            })

            identifiers = ['Fabricante', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Fecha'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)

        elif self.indicador == 'campana':
            ## campana por region

            self.last_added.rename(columns={'Dose': 'Dosis'}, inplace=True)
            utils.regionName(self.last_added)

            self.last_added["Dosis"] = self.last_added["Dosis"].replace({
                "First":
                "Primera",
                "Second":
                "Segunda"
            })

            identifiers = ['Region', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Fecha'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)

        elif self.indicador == 'edad':
            ## campana por edad

            self.last_added.rename(columns={
                'Dose': 'Dosis',
                'Age': 'Rango_etario'
            },
                                   inplace=True)

            self.last_added["Dosis"] = self.last_added["Dosis"].replace({
                "First":
                "Primera",
                "Second":
                "Segunda"
            })

            identifiers = ['Rango_etario', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Fecha'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)

        elif self.indicador == 'caracteristicas_del_vacunado':
            ## campana por caracter del vacunado

            self.last_added.rename(columns={
                'Dose': 'Dosis',
                'Group': 'Grupo'
            },
                                   inplace=True)

            self.last_added["Dosis"] = self.last_added["Dosis"].replace({
                "First":
                "Primera",
                "Second":
                "Segunda"
            })

            identifiers = ['Grupo', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Fecha'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)

        elif self.indicador == 'vacunas_region':
            self.last_added.rename(columns={
                'REGION_CORTO': 'Region',
                'COD_COMUNA_FINAL': 'Comuna',
                'FECHA_INMUNIZACION': 'Fecha',
                'SUM_of_SUM_of_2aDOSIS': 'Segunda_comuna',
                'SUM_of_SUM_of_1aDOSIS': 'Primera_comuna'
            },
                                   inplace=True)
            self.last_added = self.last_added.dropna(subset=['Fecha'])
            self.last_added['Fecha'] = pd.to_datetime(
                self.last_added['Fecha'],
                format='%d/%m/%Y').dt.strftime("%Y-%m-%d")
            self.last_added.sort_values(by=['Region', 'Fecha'], inplace=True)
            utils.regionName(self.last_added)
            regiones = pd.DataFrame(self.last_added['Region'].unique())

            #transformar
            ## agrupar por comuna
            self.last_added['Primera'] = self.last_added.groupby(
                ['Region', 'Fecha'])['Primera_comuna'].transform('sum')
            self.last_added['Segunda'] = self.last_added.groupby(
                ['Region', 'Fecha'])['Segunda_comuna'].transform('sum')
            self.last_added = self.last_added[[
                'Region', 'Fecha', 'Primera', 'Segunda'
            ]]
            self.last_added.drop_duplicates(inplace=True)

            ##llenar fechas para cada region y crear total
            idx = pd.date_range(self.last_added['Fecha'].min(),
                                self.last_added['Fecha'].max())
            df = pd.DataFrame()
            total = pd.DataFrame(
                columns=['Region', 'Fecha', 'Primera', 'Segunda'])
            total = utils.fill_in_missing_dates(total, 'Fecha', 0, idx)
            total["Region"] = total["Region"].replace({0: 'Total'})
            for region in regiones[0]:
                df_region = self.last_added.loc[self.last_added['Region'] ==
                                                region]
                df_region = utils.fill_in_missing_dates(
                    df_region, 'Fecha', 0, idx)
                df_region["Region"] = df_region["Region"].replace({0: region})
                total['Primera'] = df_region['Primera'] + total['Primera']
                total['Segunda'] = df_region['Segunda'] + total['Segunda']
                df = df.append(df_region, ignore_index=True)
            total = total.append(df, ignore_index=True)
            total['Fecha'] = total['Fecha'].dt.strftime("%Y-%m-%d")
            self.last_added = total

            ##sumar totales
            self.last_added['Primera'] = pd.to_numeric(
                self.last_added['Primera'])
            self.last_added['Segunda'] = pd.to_numeric(
                self.last_added['Segunda'])
            self.last_added['Primera'] = self.last_added.groupby(
                ['Region'])['Primera'].transform('cumsum')
            self.last_added['Segunda'] = self.last_added.groupby(
                ['Region'])['Segunda'].transform('cumsum')
            #self.last_added['Total'] = self.last_added.sum(numeric_only=True, axis=1)

            ##transformar en input
            df = pd.DataFrame()
            regiones = pd.DataFrame(self.last_added['Region'].unique())
            for region in regiones[0]:
                df_region = self.last_added.loc[self.last_added['Region'] ==
                                                region]
                df_region.set_index('Fecha', inplace=True)
                df_region = df_region[['Primera', 'Segunda']].T
                df_region.reset_index(drop=True, inplace=True)
                df = df.append(df_region, ignore_index=True)

            new_col = [
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda', 'Primera', 'Segunda',
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda', 'Primera', 'Segunda',
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda', 'Primera', 'Segunda',
                'Primera', 'Segunda', 'Primera', 'Segunda'
            ]
            df.insert(0, column='Dosis', value=new_col)
            new_col = pd.DataFrame()
            for region in regiones[0]:
                col = [region, region]
                new_col = new_col.append(col, ignore_index=True)
            df.insert(0, column='Region', value=new_col)
            self.last_added = df

            identifiers = ['Region', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Fecha'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)
            df_std.to_json(self.output + '.json',
                           orient='values',
                           force_ascii=False)

        elif self.indicador == 'vacunas_edad_region':
            self.last_added.rename(columns={
                'NOMBRE_REGION': 'Region',
                'COD_COMUNA': 'Comuna',
                'EDAD_ANOS': 'Edad',
                'POBLACION': 'Poblacion',
                '2aDOSIS_RES': 'Segunda_comuna',
                '1aDOSIS_RES': 'Primera_comuna'
            },
                                   inplace=True)
            self.last_added.sort_values(by=['Region', 'Edad'], inplace=True)
            utils.regionName(self.last_added)
            regiones = pd.DataFrame(self.last_added['Region'].unique())

            # transformar
            ## agrupar por comuna
            self.last_added['Primera'] = self.last_added.groupby(
                ['Region', 'Edad'])['Primera_comuna'].transform('sum')
            self.last_added['Segunda'] = self.last_added.groupby(
                ['Region', 'Edad'])['Segunda_comuna'].transform('sum')
            self.last_added['Poblacion'] = self.last_added.groupby(
                ['Region', 'Edad'])['Poblacion'].transform('sum')
            self.last_added = self.last_added[[
                'Region', 'Edad', 'Poblacion', 'Primera', 'Segunda'
            ]]
            self.last_added.drop_duplicates(inplace=True)

            ##crear total
            df = pd.DataFrame()
            total = pd.DataFrame(
                columns=['Region', 'Edad', 'Poblacion', 'Primera', 'Segunda'])
            total['Edad'] = list(range(15, 81))
            total["Region"] = total["Region"].fillna('Total')
            for region in regiones[0]:
                df_region = self.last_added.loc[self.last_added['Region'] ==
                                                region]
                df_region.reset_index(drop=True, inplace=True)
                total['Primera'] = total.Primera.fillna(
                    0) + df_region.Primera.fillna(0)
                total['Segunda'] = total.Segunda.fillna(
                    0) + df_region.Segunda.fillna(0)
                total['Poblacion'] = total.Poblacion.fillna(
                    0) + df_region.Poblacion.fillna(0)
                df = df.append(df_region, ignore_index=True)
            edad = total
            total = total.append(df, ignore_index=True)
            self.last_added = total

            ##transformar en input
            df = pd.DataFrame()
            regiones = pd.DataFrame(self.last_added['Region'].unique())
            for region in regiones[0]:
                df_region = self.last_added.loc[self.last_added['Region'] ==
                                                region]
                df_region.set_index('Edad', inplace=True)
                df_region = df_region[['Primera', 'Segunda']].T
                df_region.reset_index(drop=True, inplace=True)
                df = df.append(df_region, ignore_index=True)

            new_col = [
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda', 'Primera', 'Segunda',
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda', 'Primera', 'Segunda',
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda', 'Primera', 'Segunda',
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda'
            ]
            df.insert(0, column='Dosis', value=new_col)
            new_col = pd.DataFrame()
            for region in regiones[0]:
                col = [region, region]
                new_col = new_col.append(col, ignore_index=True)
            df.insert(0, column='Region', value=new_col)
            self.last_added = df

            identifiers = ['Region', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Edad'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)
            df_std.to_json(self.output + '.json',
                           orient='values',
                           force_ascii=False)

        elif self.indicador == 'vacunas_edad_sexo':
            self.last_added.rename(columns={
                'NOMBRE_REGION': 'Region',
                'SEXO': 'Sexo',
                'EDAD_ANOS': 'Edad',
                'POBLACION': 'Poblacion',
                'SUM_of_1aDOSIS': 'Primera',
                'SUM_of_2aDOSIS': 'Segunda'
            },
                                   inplace=True)
            self.last_added.sort_values(by=['Sexo', 'Edad'], inplace=True)
            self.last_added = self.last_added[[
                'Sexo', 'Edad', 'Primera', 'Segunda'
            ]]
            sexo = pd.DataFrame(self.last_added['Sexo'].unique())

            ##crear total
            df = pd.DataFrame()
            for sex in sexo[0]:
                total = pd.DataFrame(
                    columns=['Sexo', 'Edad', 'Primera', 'Segunda'])
                total['Edad'] = list(
                    range(self.last_added.Edad.min(),
                          self.last_added.Edad.max() + 1))
                df_sex = self.last_added.loc[self.last_added['Sexo'] == sex]
                df_sex.reset_index(drop=True, inplace=True)
                df_sex.index = df_sex['Edad']
                total.index = total['Edad']
                total['Sexo'] = total.Sexo.fillna(sex)
                total['Primera'] = total.Primera.fillna(
                    0) + df_sex.Primera.fillna(0)
                total['Segunda'] = total.Segunda.fillna(
                    0) + df_sex.Segunda.fillna(0)
                df = df.append(total, ignore_index=True)
            self.last_added = df

            ##transformar en input
            df = pd.DataFrame()
            sexo = pd.DataFrame(self.last_added['Sexo'].unique())
            for sex in sexo[0]:
                df_sex = self.last_added.loc[self.last_added['Sexo'] == sex]
                df_sex.set_index('Edad', inplace=True)
                df_sex = df_sex[['Primera', 'Segunda']].T
                df_sex.reset_index(drop=True, inplace=True)
                df = df.append(df_sex, ignore_index=True)

            new_col = [
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda'
            ]
            df.insert(0, column='Dosis', value=new_col)
            new_col = pd.DataFrame()
            for sex in sexo[0]:
                col = [sex, sex]
                new_col = new_col.append(col, ignore_index=True)
            df.insert(0, column='Sexo', value=new_col)
            self.last_added = df

            identifiers = ['Sexo', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Edad'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)
            df_std.to_json(self.output + '.json',
                           orient='values',
                           force_ascii=False)

        elif self.indicador == 'vacunas_prioridad':
            self.last_added.rename(columns={
                'Criterio': 'Grupo',
                'Subcriterio': 'Subgrupo',
                '1aDOSIS': 'Primera',
                '2aDOSIS': 'Segunda'
            },
                                   inplace=True)
            self.last_added.sort_values(by=['Grupo', 'Subgrupo'], inplace=True)
            self.last_added = self.last_added[[
                'Grupo', 'Subgrupo', 'Primera', 'Segunda'
            ]]

            ##transformar en input
            df = pd.DataFrame()
            grupos = pd.DataFrame(self.last_added['Grupo'].unique())
            for grupo in grupos[0]:
                df_grupo = self.last_added.loc[self.last_added['Grupo'] ==
                                               grupo]
                df_grupo.set_index('Subgrupo', inplace=True)
                df_grupo = df_grupo[['Primera', 'Segunda']].T
                df_grupo.reset_index(drop=True, inplace=True)
                df = df.append(df_grupo, ignore_index=True)

            new_col = [
                'Primera', 'Segunda', 'Primera', 'Segunda', 'Primera',
                'Segunda', 'Primera', 'Segunda', 'Primera', 'Segunda',
                'Primera', 'Segunda', 'Primera', 'Segunda'
            ]
            df.insert(0, column='Dosis', value=new_col)
            new_col = pd.DataFrame()
            for grupo in grupos[0]:
                col = [grupo, grupo]
                new_col = new_col.append(col, ignore_index=True)
            df.insert(0, column='Grupo', value=new_col)
            self.last_added = df

            identifiers = ['Grupo', 'Dosis']
            variables = [
                x for x in self.last_added.columns if x not in identifiers
            ]

            self.last_added = self.last_added[identifiers + variables]
            self.last_added.to_csv(self.output + '.csv', index=False)

            df_t = self.last_added.T
            df_t.to_csv(self.output + '_t.csv', header=False)

            df_std = pd.melt(self.last_added,
                             id_vars=identifiers,
                             value_vars=variables,
                             var_name=['Subgrupo'],
                             value_name='Cantidad')

            df_std.to_csv(self.output + '_std.csv', index=False)
            df_std.to_json(self.output + '.json',
                           orient='values',
                           force_ascii=False)

        elif self.indicador == 'vacunas_comuna':
            ##template por comuna
            df_base = pd.read_csv(
                '../input/DistribucionDEIS/baseFiles/DEIS_template.csv')
            df_base['Codigo region'] = df_base['Codigo region'].fillna(0)
            df_base['Codigo comuna'] = df_base['Codigo comuna'].fillna(0)
            df_base['Comuna'] = df_base['Comuna'].fillna(0)
            todrop = df_base.loc[df_base['Comuna'] == 0]
            df_base.drop(todrop.index, inplace=True)
            df_base['Codigo region'] = df_base['Codigo region'].astype(int)
            df_base['Codigo comuna'] = df_base['Codigo comuna'].astype(int)

            desconocido = df_base['Codigo comuna'] != 0
            df_base['Codigo comuna'].where(desconocido, '', inplace=True)

            Comp = df_base.loc[df_base['Comuna'] != 'Total']
            Comp.reset_index(inplace=True)
            utils.desconocidoName(Comp)
            for k in range(len(Comp)):
                if Comp.loc[k, 'Codigo region'] < 10:
                    Comp.loc[k, 'Codigo region'] = '0' + str(
                        Comp.loc[k, 'Codigo region'])
                else:
                    Comp.loc[k,
                             'Codigo region'] = str(Comp.loc[k,
                                                             'Codigo region'])

                if Comp.loc[k, 'Codigo comuna'] != '':
                    if Comp.loc[k, 'Codigo comuna'] < 10000:
                        Comp.loc[k, 'Codigo comuna'] = '0' + str(
                            Comp.loc[k, 'Codigo comuna'])
                    else:
                        Comp.loc[k, 'Codigo comuna'] = str(
                            Comp.loc[k, 'Codigo comuna'])

            comuna = Comp['Comuna']

            self.last_added.rename(columns={
                'REGION_CORTO': 'region_residencia',
                'COD_COMUNA_FINAL': 'Codigo comuna',
                'FECHA_INMUNIZACION': 'Fecha',
                'SUM_of_SUM_of_2aDOSIS': 'Segunda_comuna',
                'SUM_of_SUM_of_1aDOSIS': 'Primera_comuna'
            },
                                   inplace=True)
            self.last_added = self.last_added.dropna(subset=['Fecha'])
            self.last_added['Fecha'] = pd.to_datetime(
                self.last_added['Fecha'],
                format='%d/%m/%Y').dt.strftime("%Y-%m-%d")
            self.last_added.sort_values(by=['region_residencia', 'Fecha'],
                                        inplace=True)
            utils.regionDEISName(self.last_added)

            for k in range(len(self.last_added)):
                if self.last_added.loc[k, 'Codigo comuna'] != '':
                    if self.last_added.loc[k, 'Codigo comuna'] < 10000:
                        self.last_added.loc[k, 'Codigo comuna'] = '0' + str(
                            self.last_added.loc[k, 'Codigo comuna'])
                    else:
                        self.last_added.loc[k, 'Codigo comuna'] = str(
                            self.last_added.loc[k, 'Codigo comuna'])

            df_sup = Comp[['Codigo comuna', 'Comuna']]
            df_sup['Codigo comuna'] = df_sup['Codigo comuna'].replace('', 0)
            self.last_added = self.last_added.merge(df_sup,
                                                    on="Codigo comuna",
                                                    how="left")
            self.last_added.set_index('Comuna', inplace=True)

            columns_name = self.last_added.columns.values

            maxSE = self.last_added[columns_name[2]].max()
            minSE = self.last_added[columns_name[2]].min()

            print(minSE, maxSE)
            lenSE = (pd.to_datetime(maxSE) - pd.to_datetime(minSE)).days + 1
            startdate = pd.to_datetime(minSE)
            date_list = pd.date_range(startdate, periods=lenSE).tolist()
            date_list = [
                dt.datetime.strftime(x, "%Y-%m-%d") for x in date_list
            ]
            print(date_list)

            SE_comuna = self.last_added[columns_name[2]]

            for k in [3, 4]:
                df = pd.DataFrame(np.zeros((len(comuna), lenSE)))

                dicts = {}
                keys = range(lenSE)
                # values = [i for i in range(lenSE)]

                for i in keys:
                    dicts[i] = date_list[i]

                df.rename(columns=dicts, inplace=True)
                value_comuna = self.last_added[columns_name[k]]
                value_comuna.fillna(0, inplace=True)
                i = 0
                for row in self.last_added.index:
                    idx = comuna.loc[comuna == row].index.values
                    if idx.size > 0:
                        col = SE_comuna[i]
                        df[col][idx] = value_comuna[i].astype(int)

                    i += 1

                df_output = pd.concat([Comp, df], axis=1)
                df_output.drop(columns=['index'], axis=1, inplace=True)

                nComunas = [
                    len(list(group))
                    for key, group in groupby(df_output['Codigo region'])
                ]

                identifiers = [
                    'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                ]
                variables = [
                    x for x in df_output.columns if x not in identifiers
                ]

                begRow = 0

                for i in range(len(nComunas)):

                    endRow = begRow + nComunas[i]

                    firstList = df_output[identifiers].iloc[endRow -
                                                            1].values.tolist()
                    firstList[2] = 'Total'
                    firstList[3] = ''

                    valuesTotal = df_output[variables][begRow:endRow].sum(
                        axis=0).tolist()

                    regionTotal = pd.DataFrame(
                        (firstList + valuesTotal),
                        index=df_output.columns.values).transpose()

                    if i < len(nComunas) - 1:
                        blank_line = pd.Series(
                            np.empty((len(regionTotal), 0)).tolist())

                        regionTotal = pd.concat([regionTotal, blank_line],
                                                axis=0)
                        regionTotal.drop(columns=0, axis=1, inplace=True)

                    temp = pd.concat(
                        [df_output.iloc[begRow:endRow], regionTotal], axis=0)
                    if i == 0:
                        outputDF2 = temp
                    else:
                        outputDF2 = pd.concat([outputDF2, temp], axis=0)

                    if i < len(nComunas) - 1:
                        begRow = endRow

                outputDF2.reset_index(inplace=True)
                outputDF2.drop(columns=['index'], axis=1, inplace=True)
                outputDF2[variables] = outputDF2[variables].dropna(
                )  # .astype(int)

                print(outputDF2.head(20))

                outputDF2.dropna(how='all', inplace=True)
                todrop = outputDF2.loc[outputDF2['Comuna'] == 'Total']
                outputDF2.drop(todrop.index, inplace=True)

                if k == 3:
                    name = self.output + '_1eraDosis.csv'
                    outputDF2.to_csv(name, index=False)
                    outputDF2_T = outputDF2.T
                    outputDF2_T.to_csv(name.replace('.csv', '_T.csv'),
                                       header=False)
                    identifiers = [
                        'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                    ]
                    outputDF2.drop(columns=['Poblacion'], inplace=True)
                    variables = [
                        x for x in outputDF2.columns if x not in identifiers
                    ]
                    outputDF2_std = pd.melt(outputDF2,
                                            id_vars=identifiers,
                                            value_vars=variables,
                                            var_name='Fecha',
                                            value_name='Primera Dosis')
                    outputDF2_std.to_csv(name.replace('.csv', '_std.csv'),
                                         index=False)
                elif k == 4:
                    name = self.output + '_2daDosis.csv'
                    outputDF2.to_csv(name, index=False)
                    outputDF2_T = outputDF2.T
                    outputDF2_T.to_csv(name.replace('.csv', '_T.csv'),
                                       header=False)
                    identifiers = [
                        'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                    ]
                    outputDF2.drop(columns=['Poblacion'], inplace=True)
                    variables = [
                        x for x in outputDF2.columns if x not in identifiers
                    ]
                    outputDF2_std = pd.melt(outputDF2,
                                            id_vars=identifiers,
                                            value_vars=variables,
                                            var_name='Fecha',
                                            value_name='Segunda Dosis')
                    outputDF2_std.to_csv(name.replace('.csv', '_std.csv'),
                                         index=False)

        elif self.indicador == 'vacunas_comuna_edad':
            ##template por comuna
            df_base = pd.read_csv(
                '../input/DistribucionDEIS/baseFiles/DEIS_template.csv')
            df_base['Codigo region'] = df_base['Codigo region'].fillna(0)
            df_base['Codigo comuna'] = df_base['Codigo comuna'].fillna(0)
            df_base['Comuna'] = df_base['Comuna'].fillna(0)
            todrop = df_base.loc[df_base['Comuna'] == 0]
            df_base.drop(todrop.index, inplace=True)
            df_base['Codigo region'] = df_base['Codigo region'].astype(int)
            df_base['Codigo comuna'] = df_base['Codigo comuna'].astype(int)

            desconocido = df_base['Codigo comuna'] != 0
            df_base['Codigo comuna'].where(desconocido, '', inplace=True)

            Comp = df_base.loc[df_base['Comuna'] != 'Total']
            Comp.reset_index(inplace=True)
            utils.desconocidoName(Comp)
            for k in range(len(Comp)):
                if Comp.loc[k, 'Codigo region'] < 10:
                    Comp.loc[k, 'Codigo region'] = '0' + str(
                        Comp.loc[k, 'Codigo region'])
                else:
                    Comp.loc[k,
                             'Codigo region'] = str(Comp.loc[k,
                                                             'Codigo region'])

                if Comp.loc[k, 'Codigo comuna'] != '':
                    if Comp.loc[k, 'Codigo comuna'] < 10000:
                        Comp.loc[k, 'Codigo comuna'] = '0' + str(
                            Comp.loc[k, 'Codigo comuna'])
                    else:
                        Comp.loc[k, 'Codigo comuna'] = str(
                            Comp.loc[k, 'Codigo comuna'])

            comuna = Comp['Comuna']

            self.last_added.rename(columns={
                'NOMBRE_REGION': 'region_residencia',
                'COD_COMUNA': 'Codigo comuna',
                'EDAD_ANOS': 'Edad',
                'SUM_of_SUM_of_2aDOSIS': 'Segunda_comuna',
                'SUM_of_SUM_of_1aDOSIS': 'Primera_comuna'
            },
                                   inplace=True)
            utils.regionDEISName(self.last_added)

            for k in range(len(self.last_added)):
                if self.last_added.loc[k, 'Codigo comuna'] != '':
                    if self.last_added.loc[k, 'Codigo comuna'] < 10000:
                        self.last_added.loc[k, 'Codigo comuna'] = '0' + str(
                            self.last_added.loc[k, 'Codigo comuna'])
                    else:
                        self.last_added.loc[k, 'Codigo comuna'] = str(
                            self.last_added.loc[k, 'Codigo comuna'])

            df_sup = Comp[['Codigo comuna', 'Comuna']]
            df_sup['Codigo comuna'] = df_sup['Codigo comuna'].replace('', 0)
            self.last_added = self.last_added.merge(df_sup,
                                                    on="Codigo comuna",
                                                    how="left")
            self.last_added.set_index('Comuna', inplace=True)

            columns_name = self.last_added.columns.values

            maxSE = self.last_added[columns_name[2]].max()
            minSE = self.last_added[columns_name[2]].min()

            print(minSE, maxSE)
            lenSE = maxSE - minSE + 1
            date_list = list(range(minSE, maxSE + 1))
            print(date_list)

            SE_comuna = self.last_added[columns_name[2]]

            for k in [4, 5]:
                df = pd.DataFrame(np.zeros((len(comuna), lenSE)))

                dicts = {}
                keys = range(lenSE)
                # values = [i for i in range(lenSE)]

                for i in keys:
                    dicts[i] = date_list[i]

                df.rename(columns=dicts, inplace=True)
                value_comuna = self.last_added[columns_name[k]]
                value_comuna.fillna(0, inplace=True)
                i = 0
                for row in self.last_added.index:
                    idx = comuna.loc[comuna == row].index.values
                    if idx.size > 0:
                        col = SE_comuna[i]
                        df[col][idx] = value_comuna[i].astype(int)

                    i += 1

                df_output = pd.concat([Comp, df], axis=1)
                df_output.drop(columns=['index'], axis=1, inplace=True)

                nComunas = [
                    len(list(group))
                    for key, group in groupby(df_output['Codigo region'])
                ]

                identifiers = [
                    'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                ]
                variables = [
                    x for x in df_output.columns if x not in identifiers
                ]

                begRow = 0

                for i in range(len(nComunas)):

                    endRow = begRow + nComunas[i]

                    firstList = df_output[identifiers].iloc[endRow -
                                                            1].values.tolist()
                    firstList[2] = 'Total'
                    firstList[3] = ''

                    valuesTotal = df_output[variables][begRow:endRow].sum(
                        axis=0).tolist()

                    regionTotal = pd.DataFrame(
                        (firstList + valuesTotal),
                        index=df_output.columns.values).transpose()

                    if i < len(nComunas) - 1:
                        blank_line = pd.Series(
                            np.empty((len(regionTotal), 0)).tolist())

                        regionTotal = pd.concat([regionTotal, blank_line],
                                                axis=0)
                        regionTotal.drop(columns=0, axis=1, inplace=True)

                    temp = pd.concat(
                        [df_output.iloc[begRow:endRow], regionTotal], axis=0)
                    if i == 0:
                        outputDF2 = temp
                    else:
                        outputDF2 = pd.concat([outputDF2, temp], axis=0)

                    if i < len(nComunas) - 1:
                        begRow = endRow

                outputDF2.reset_index(inplace=True)
                outputDF2.drop(columns=['index'], axis=1, inplace=True)
                outputDF2[variables] = outputDF2[variables].dropna(
                )  # .astype(int)

                print(outputDF2.head(20))

                outputDF2.dropna(how='all', inplace=True)
                todrop = outputDF2.loc[outputDF2['Comuna'] == 'Total']
                outputDF2.drop(todrop.index, inplace=True)

                if k == 4:
                    name = self.output + '_1eraDosis.csv'
                    outputDF2.to_csv(name, index=False)
                    outputDF2_T = outputDF2.T
                    outputDF2_T.to_csv(name.replace('.csv', '_T.csv'),
                                       header=False)
                    identifiers = [
                        'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                    ]
                    outputDF2.drop(columns=['Poblacion'], inplace=True)
                    variables = [
                        x for x in outputDF2.columns if x not in identifiers
                    ]
                    outputDF2_std = pd.melt(outputDF2,
                                            id_vars=identifiers,
                                            value_vars=variables,
                                            var_name='Edad',
                                            value_name='Primera Dosis')
                    outputDF2_std.to_csv(name.replace('.csv', '_std.csv'),
                                         index=False)
                elif k == 5:
                    name = self.output + '_2daDosis.csv'
                    outputDF2.to_csv(name, index=False)
                    outputDF2_T = outputDF2.T
                    outputDF2_T.to_csv(name.replace('.csv', '_T.csv'),
                                       header=False)
                    identifiers = [
                        'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
                    ]
                    outputDF2.drop(columns=['Poblacion'], inplace=True)
                    variables = [
                        x for x in outputDF2.columns if x not in identifiers
                    ]
                    outputDF2_std = pd.melt(outputDF2,
                                            id_vars=identifiers,
                                            value_vars=variables,
                                            var_name='Edad',
                                            value_name='Segunda Dosis')
                    outputDF2_std.to_csv(name.replace('.csv', '_std.csv'),
                                         index=False)
Exemple #5
0
    def last_to_csv(self):
        if self.indicador == 'vacunas_comuna_edad':
            ##template por comuna
            df_base = pd.read_csv(
                '../input/DistribucionDEIS/baseFiles/DEIS_template.csv')
            df_base['Codigo region'] = df_base['Codigo region'].fillna(0)
            df_base['Codigo comuna'] = df_base['Codigo comuna'].fillna(0)
            df_base['Comuna'] = df_base['Comuna'].fillna(0)
            todrop = df_base.loc[df_base['Comuna'] == 0]
            df_base.drop(todrop.index, inplace=True)
            df_base['Codigo region'] = df_base['Codigo region'].astype(int)
            df_base['Codigo comuna'] = df_base['Codigo comuna'].astype(int)

            desconocido = df_base['Codigo comuna'] != 0
            df_base['Codigo comuna'].where(desconocido, '', inplace=True)

            Comp = df_base.loc[df_base['Comuna'] != 'Total']
            Comp.reset_index(inplace=True)
            utils.desconocidoName(Comp)
            for k in range(len(Comp)):
                if Comp.loc[k, 'Codigo region'] < 10:
                    Comp.loc[k, 'Codigo region'] = '0' + str(
                        Comp.loc[k, 'Codigo region'])
                else:
                    Comp.loc[k,
                             'Codigo region'] = str(Comp.loc[k,
                                                             'Codigo region'])

                if Comp.loc[k, 'Codigo comuna'] != '':
                    if Comp.loc[k, 'Codigo comuna'] < 10000:
                        Comp.loc[k, 'Codigo comuna'] = '0' + str(
                            Comp.loc[k, 'Codigo comuna'])
                    else:
                        Comp.loc[k, 'Codigo comuna'] = str(
                            Comp.loc[k, 'Codigo comuna'])

            comuna = Comp['Comuna']

            self.last_added.rename(columns={
                'NOMBRE_REGION': 'region_residencia',
                'COD_COMUNA': 'Codigo comuna',
                'POBLACION': 'poblacion',
                'EDAD_ANOS': 'Edad',
                'SUM_of_SUM_of_2aDOSIS': 'Segunda_comuna',
                'SUM_of_SUM_of_1aDOSIS': 'Primera_comuna'
            },
                                   inplace=True)
            utils.regionDEISName(self.last_added)

            for k in range(len(self.last_added)):
                if self.last_added.loc[k, 'Codigo comuna'] != '':
                    if self.last_added.loc[k, 'Codigo comuna'] < 10000:
                        self.last_added.loc[k, 'Codigo comuna'] = '0' + str(
                            self.last_added.loc[k, 'Codigo comuna'])
                    else:
                        self.last_added.loc[k, 'Codigo comuna'] = str(
                            self.last_added.loc[k, 'Codigo comuna'])

            df_sup = Comp[['Codigo comuna', 'Comuna']]
            df_sup['Codigo comuna'] = df_sup['Codigo comuna'].replace('', 0)
            self.last_added = self.last_added.merge(df_sup,
                                                    on="Codigo comuna",
                                                    how="left")
            self.last_added.set_index('Comuna', inplace=True)

            columns_name = self.last_added.columns.values

            maxSE = self.last_added[columns_name[3]].max()
            minSE = self.last_added[columns_name[3]].min()

            print(minSE, maxSE)
            lenSE = maxSE - minSE + 1
            date_list = list(range(minSE, maxSE + 1))
            print(date_list)

            SE_comuna = self.last_added[columns_name[3]]

            k = 4
            df = pd.DataFrame(np.zeros((len(comuna), lenSE)))

            dicts = {}
            keys = range(lenSE)
            # values = [i for i in range(lenSE)]

            for i in keys:
                dicts[i] = date_list[i]

            df.rename(columns=dicts, inplace=True)
            value_comuna = self.last_added[columns_name[k]]
            value_comuna.fillna(0, inplace=True)
            i = 0
            for row in self.last_added.index:
                idx = comuna.loc[comuna == row].index.values
                if idx.size > 0:
                    col = SE_comuna[i]
                    df[col][idx] = value_comuna[i].astype(int)

                i += 1

            df_output = pd.concat([Comp, df], axis=1)
            df_output.drop(columns=['index'], axis=1, inplace=True)

            nComunas = [
                len(list(group))
                for key, group in groupby(df_output['Codigo region'])
            ]

            identifiers = [
                'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
            ]
            variables = [x for x in df_output.columns if x not in identifiers]

            begRow = 0

            for i in range(len(nComunas)):

                endRow = begRow + nComunas[i]

                firstList = df_output[identifiers].iloc[endRow -
                                                        1].values.tolist()
                firstList[2] = 'Total'
                firstList[3] = ''

                valuesTotal = df_output[variables][begRow:endRow].sum(
                    axis=0).tolist()

                regionTotal = pd.DataFrame(
                    (firstList + valuesTotal),
                    index=df_output.columns.values).transpose()

                if i < len(nComunas) - 1:
                    blank_line = pd.Series(
                        np.empty((len(regionTotal), 0)).tolist())

                    regionTotal = pd.concat([regionTotal, blank_line], axis=0)
                    regionTotal.drop(columns=0, axis=1, inplace=True)

                temp = pd.concat([df_output.iloc[begRow:endRow], regionTotal],
                                 axis=0)
                if i == 0:
                    outputDF2 = temp
                else:
                    outputDF2 = pd.concat([outputDF2, temp], axis=0)

                if i < len(nComunas) - 1:
                    begRow = endRow

            outputDF2.reset_index(inplace=True)
            outputDF2.drop(columns=['index'], axis=1, inplace=True)
            outputDF2[variables] = outputDF2[variables].dropna(
            )  # .astype(int)

            print(outputDF2.head(20))

            outputDF2.dropna(how='all', inplace=True)
            todrop = outputDF2.loc[outputDF2['Comuna'] == 'Total']
            outputDF2.drop(todrop.index, inplace=True)

            name = self.output + '.csv'
            outputDF2.to_csv(name, index=False)
            outputDF2_T = outputDF2.T
            outputDF2_T.to_csv(name.replace('.csv', '_T.csv'), header=False)
            identifiers = [
                'Region', 'Codigo region', 'Comuna', 'Codigo comuna'
            ]
            outputDF2.drop(columns=['Poblacion'], inplace=True)
            variables = [x for x in outputDF2.columns if x not in identifiers]
            outputDF2_std = pd.melt(outputDF2,
                                    id_vars=identifiers,
                                    value_vars=variables,
                                    var_name='Edad',
                                    value_name='Poblacion')
            outputDF2_std.to_csv(name.replace('.csv', '_std.csv'), index=False)