import pandas as pd
from class_utilities import Utilities as ut
from class_plot_functions import ScatterPlot as sc
from class_gather_data import DataHandler as dh
#%matplotlib notebook

#load file on poverty
#https://data.oecd.org/inequality/poverty-rate.html
file_1 = "D:\ECE 143\Data files for Project\Poverty.csv"

pv_raw = dh.gather_data_from_csv(file_1)
pv_raw = ut.remove_spaces(pv_raw)
pv_c = pv_raw[(pv_raw.SUBJECT == "TOT")]
pv_piv = ut.pivot(pv_c,
                  index_new='LOCATION',
                  columns_new='TIME',
                  values_new='Value')

#load file on social expenditure
#https://data.oecd.org/inequality/poverty-rate.html
file_2 = "D:\ECE 143\Data files for Project\Social Expenditure.csv"
soc_raw = dh.gather_data_from_csv(file_2)
soc_raw = ut.remove_spaces(soc_raw)
soc_c = soc_raw[(soc_raw.Source == "Public") & (soc_raw.Branch == "Total") &
                (soc_raw.Type_of_Expenditure == "Total") &
                (soc_raw.Measure == "In percentage of Gross Domestic Product")]
soc_piv = ut.pivot(soc_c,
                   index_new='COUNTRY',
                   columns_new='Year',
                   values_new='Value')
Beispiel #2
0
from class_gather_data import DataHandler as dh
from class_plot_functions import BokehLine as bl

#load_data
file_1="D:\ECE 143\Data files for Project\Govt exp imf 2.xls"
df = dh.gather_data_from_Excel(file_1, sh_name='exp')
df.rename(columns={'Government expenditure, percent of GDP (% of GDP)':'Country_name'},
                   inplace=True)
df.drop([0],inplace=True)
df=df.T
new_header = df.iloc[0]
df = df[1:]
df.columns = new_header
df.index.names=['Year']
df.columns.names=['Countries']
df.drop(list(range(1800,1880)),inplace=True)
df.drop(df.iloc[:, -2::], inplace = True, axis = 1)
df1=df[['Argentina','Australia','Germany','India','Italy','Japan','United States',
        'United Kingdom']].copy()
#pick initial country
country='United States'
col=df1.columns.get_loc(country)
file_name="line.html"
bok_line=bl(df1,
             country,
             title='Government Expenditure',
             xlabel='Year',
             ylabel='Government Expenditure (% of GDP)')
fig=bok_line.make_line()
fig=bok_line.add_hover(fig,tooltips=[
                            ('Year',"$x{0f}"),
import os
from class_gather_data import DataHandler as dh
from class_plot_functions import Choropleth as choro
#%matplotlib inline

#load json file that contains the map
world_geo = os.path.join(
    r'C:\Users\shrey\A_Repository_for_the_Final_Project_for_ECE_143',
    'countries.geo.json')
#load data
file_1 = "D:/ECE 143/Data files for Project/001_Main SPEED Dataset 2015.xls"
df = dh.gather_data_from_Excel(file_1, sh_name='poptotal_ppp', index='country')
# make Choropleth object
m = choro(location=[30, 15],
          zoom_start=2.4,
          geo_data=world_geo,
          data=df,
          columns=['ISO', 2009],
          key_on='feature.id',
          legend_name='Government Expenditure per capita(US$), 2009',
          fill_color="YlGn")

mapp = m.make_map()
mapp = m.add_marker(mapp, [([49.463803, 6.18632], "Luxembourg $30.2k"),
                           ([41.850033, -87.6500523], "USA $10.45k"),
                           ([35, 104], "China $1.6k")])
m.save(mapp, "my_plot.html")
display(mapp)
import pandas as pd
from class_utilities import Utilities as ut
from class_plot_functions import ScatterPlot as sc
from class_gather_data import DataHandler as dh
#%matplotlib notebook

file_1 = "D:\ECE 143\Data files for Project\Education Expenditure.xls"
file_2 = "D:\ECE 143\Data files for Project\EDULIT_DS_31052019025144633.csv"
ed_raw = dh.gather_data_from_Excel(file_1,
                                   sh_name='Data',
                                   header=3,
                                   index='Country Code')
ed_piv = ed_raw

lit_raw = dh.gather_data_from_csv(file_2)
lit_raw = ut.remove_spaces(lit_raw)

cont_raw = dh.cont_cou_map(
    "D:\ECE 143\Data files for Project\country-and-continent-codes-list.csv")

#Youth literacy rate
lit_c = lit_raw[(
    lit_raw.Indicator ==
    "Youth literacy rate, population 15-24 years, both sexes (%)")]
lit_piv = ut.pivot(lit_c,
                   index_new='LOCATION',
                   columns_new='TIME',
                   values_new='Value')
lit_piv.drop(['TUR', 'SVN', 'BRA', 'MDV', 'MEX', 'ARG', 'COL'], inplace=True)

#plot
Beispiel #5
0
import numpy as np
from class_utilities import Utilities as ut
from class_plot_functions import ScatterPlot as sc
from class_gather_data import DataHandler as dh
#%matplotlib notebook

#download from https://www.kaggle.com/unsdsn/world-happiness#2017.csv
#download from https://data.worldbank.org/indicator/ny.gdp.pcap.cd
file_1 = "D:\ECE 143\Data files for Project\Happiness Report 2017.csv"
file_2 = "D:\ECE 143\Data files for Project\GDPpercapita.xls"

hap_raw = dh.gather_data_from_csv(file_1, index='Country')
gdp_cp_raw = dh.gather_data_from_Excel(file_2,
                                       sh_name='Data',
                                       index='Country Name')

hap = hap_raw['Happiness.Rank'].dropna()
hap = hap.drop('South Sudan')

gdp_cp = gdp_cp_raw[['Country Code', '2017']].copy()
gdp_cp.replace("nan", np.nan, inplace=True)
gdp_cp.dropna(inplace=True)

hap1, gdp_cp1 = ut.trim(hap, gdp_cp)
hap_df = hap1.to_frame()
merged = gdp_cp1.merge(hap_df, left_index=True, right_index=True, how='inner')
merged.set_index('Country Code', inplace=True)
merged.drop([
    'XKX', 'NER', 'AFG', 'LSO', 'KEN', 'GIN', 'GHA', 'KEN', 'MMR', 'PRY',
    'ZMB', 'BLR', 'SEN', 'ARM', 'LBR', 'TJK', 'CAF', 'BDI', 'TZA', 'RWA',
    'TGO', 'ZWE', 'MLI', 'BEN', 'BFA', 'TCD', 'BTN', 'MDG', 'AUS'
from class_gather_data import DataHandler as dh
from class_utilities import Utilities as ut
from class_plot_functions import MultiBar as mb

#load health expenditure data
hl_raw = dh.gather_data_from_csv(
    r"D:\ECE 143\Data files for Project\UN data health exp.csv",
    index='Country or Area')
hl_piv = ut.pivot(hl_raw,
                  index_new='Country or Area',
                  columns_new='Year(s)',
                  values_new='Value')
year_hl = 2010
hl_col_df = ut.pipeline(hl_piv, year_hl, "Health Expenditure")

#load education expenditure data
ed_raw = dh.gather_data_from_csv(
    "D:\ECE 143\Data files for Project\Education expenditure UNESCO.csv",
    index='Country')
ed_c = ed_raw[(
    ed_raw.Indicator ==
    "Expenditure on education as a percentage of total government expenditure (%)"
)]
ed_piv = ut.pivot(ed_c,
                  index_new='Country',
                  columns_new='Time',
                  values_new='Value')
year_ed = 2013
ed_col_df = ut.pipeline(ed_piv, year_ed, "Education Expenditure")

#load social expenditure data