コード例 #1
0
    def test_multi_index_to_single_index(self):
        df = pd.read_csv("test.csv")
        df_cleaned = df.pivot_table(values='value',
                                    index=['name'],
                                    columns=['var', 'sex'])

        df_cleaned = multi_index_to_single_index(df_cleaned)
        df_expected = pd.read_csv("test_expected.csv")

        self.assertTrue(assert_frame_equal(df_cleaned, df_expected) is None)
コード例 #2
0
df = df[['TL', 'REG_ID', 'Region', 'VAR', 'SEX', 'Year', 'Value']]
# First remove geos with names that we don't have mappings to dcid for.
regid2dcid = dict(json.loads(open('../regid2dcid.json').read()))
nuts = dict(json.loads(open('../region_nuts_codes.json').read()))
df = df[df['REG_ID'].isin(nuts.keys()) | df['REG_ID'].isin(regid2dcid.keys())]
# Second, replace the names with dcids
df['Region'] = df.apply(lambda row: generate_geo_id(row, nuts, regid2dcid),
                        axis=1)
df['Year'] = '"' + df['Year'].astype(str) + '"'

df = df[['REG_ID', 'Region', 'VAR', 'SEX', 'Year', 'Value']]

df_cleaned = df.pivot_table(values='Value',
                            index=['REG_ID', 'Region', 'Year'],
                            columns=['VAR', 'SEX'])
df_cleaned = multi_index_to_single_index(df_cleaned)

VAR_to_statsvars = {
    'TT': 'Count_Person',
    'Y0_4T': 'Count_Person_Upto4Years',
    'Y5_9T': 'Count_Person_5To9Years',
    'Y10_14T': 'Count_Person_10To14Years',
    'Y15_19T': 'Count_Person_15To19Years',
    'Y20_24T': 'Count_Person_20To24Years',
    'Y25_29T': 'Count_Person_25To29Years',
    'Y30_34T': 'Count_Person_30To34Years',
    'Y35_39T': 'Count_Person_35To39Years',
    'Y40_44T': 'Count_Person_40To44Years',
    'Y45_49T': 'Count_Person_45To49Years',
    'Y50_54T': 'Count_Person_50To54Years',
    'Y55_59T': 'Count_Person_55To59Years',
コード例 #3
0
ファイル: preprocess_csv.py プロジェクト: wh1210/data
import csv
import json
import pandas as pd

df = pd.read_csv('REGION_DEMOGR_pop_density.csv')
df = df[['TL', 'REG_ID', 'Region', 'VAR', 'SEX', 'Year', 'Value']]
# First remove geos with names that we don't have mappings to dcid for.
regid2dcid = dict(json.loads(open('../regid2dcid.json').read()))
df = df[df['REG_ID'].isin(regid2dcid.keys())]
# Second, replace the names with dcids
df['Region'] = df.apply(lambda row: regid2dcid[row['REG_ID']], axis=1)

df['Year'] = '"' + df['Year'].astype(str) + '"'
temp = df[['REG_ID', 'Region', 'VAR', 'Year', 'Value']]
temp_multi_index = temp.pivot_table(values='Value',
                                    index=['REG_ID', 'Region', 'Year'],
                                    columns=['VAR'])
df_cleaned = multi_index_to_single_index(temp_multi_index)[[
    'REG_ID', 'Region', 'Year', 'POP_DEN', 'SURF'
]]

VAR_to_statsvars = {
    'POP_DEN': 'Count_Person_PerArea',
    'SURF': 'Area',
}

df_cleaned.rename(columns=VAR_to_statsvars, inplace=True)
df_cleaned.to_csv('OECD_pop_density_cleaned.csv',
                  index=False,
                  quoting=csv.QUOTE_NONE)
コード例 #4
0
df = pd.read_csv('REGION_DEMOGR_death_tl3.csv')
df = df[['TL', 'REG_ID', 'Region', 'VAR', 'SEX', 'Year', 'Value']]
# First remove geos with names that we don't have mappings to dcid for.
regid2dcid = dict(json.loads(open('../regid2dcid.json').read()))
nuts = dict(json.loads(open('../region_nuts_codes.json').read()))
df = df[df['REG_ID'].isin(nuts.keys()) | df['REG_ID'].isin(regid2dcid.keys())]
# Second, replace the names with dcids
df['Region'] = df.apply(lambda row: generate_geo_id(row, nuts, regid2dcid),
                        axis=1)
df['Year'] = '"' + df['Year'].astype(str) + '"'

temp = df[['REG_ID', 'Region', 'VAR', 'SEX', 'Year', 'Value']]
temp_multi_index = temp.pivot_table(values='Value',
                                    index=['REG_ID', 'Region', 'Year'],
                                    columns=['VAR', 'SEX'])
df_cleaned = multi_index_to_single_index(temp_multi_index)

VAR_to_statsvars = {
    'D_TT': 'Count_Death',
    'D_Y0_4T': 'Count_Death_Upto4Years',
    'D_Y5_9T': 'Count_Death_5To9Years',
    'D_Y10_14T': 'Count_Death_10To14Years',
    'D_Y15_19T': 'Count_Death_15To19Years',
    'D_Y20_24T': 'Count_Death_20To24Years',
    'D_Y25_29T': 'Count_Death_25To29Years',
    'D_Y30_34T': 'Count_Death_30To34Years',
    'D_Y35_39T': 'Count_Death_35To39Years',
    'D_Y40_44T': 'Count_Death_40To44Years',
    'D_Y45_49T': 'Count_Death_45To49Years',
    'D_Y50_54T': 'Count_Death_50To54Years',
    'D_Y55_59T': 'Count_Death_55To59Years',