import pandas as pd from class_utilities import Utilities as ut from class_plot_functions import ScatterPlot as sc from class_gather_data import DataHandler as dh #%matplotlib notebook #load file on poverty #https://data.oecd.org/inequality/poverty-rate.html file_1 = "D:\ECE 143\Data files for Project\Poverty.csv" pv_raw = dh.gather_data_from_csv(file_1) pv_raw = ut.remove_spaces(pv_raw) pv_c = pv_raw[(pv_raw.SUBJECT == "TOT")] pv_piv = ut.pivot(pv_c, index_new='LOCATION', columns_new='TIME', values_new='Value') #load file on social expenditure #https://data.oecd.org/inequality/poverty-rate.html file_2 = "D:\ECE 143\Data files for Project\Social Expenditure.csv" soc_raw = dh.gather_data_from_csv(file_2) soc_raw = ut.remove_spaces(soc_raw) soc_c = soc_raw[(soc_raw.Source == "Public") & (soc_raw.Branch == "Total") & (soc_raw.Type_of_Expenditure == "Total") & (soc_raw.Measure == "In percentage of Gross Domestic Product")] soc_piv = ut.pivot(soc_c, index_new='COUNTRY', columns_new='Year', values_new='Value')
from class_gather_data import DataHandler as dh from class_plot_functions import BokehLine as bl #load_data file_1="D:\ECE 143\Data files for Project\Govt exp imf 2.xls" df = dh.gather_data_from_Excel(file_1, sh_name='exp') df.rename(columns={'Government expenditure, percent of GDP (% of GDP)':'Country_name'}, inplace=True) df.drop([0],inplace=True) df=df.T new_header = df.iloc[0] df = df[1:] df.columns = new_header df.index.names=['Year'] df.columns.names=['Countries'] df.drop(list(range(1800,1880)),inplace=True) df.drop(df.iloc[:, -2::], inplace = True, axis = 1) df1=df[['Argentina','Australia','Germany','India','Italy','Japan','United States', 'United Kingdom']].copy() #pick initial country country='United States' col=df1.columns.get_loc(country) file_name="line.html" bok_line=bl(df1, country, title='Government Expenditure', xlabel='Year', ylabel='Government Expenditure (% of GDP)') fig=bok_line.make_line() fig=bok_line.add_hover(fig,tooltips=[ ('Year',"$x{0f}"),
import os from class_gather_data import DataHandler as dh from class_plot_functions import Choropleth as choro #%matplotlib inline #load json file that contains the map world_geo = os.path.join( r'C:\Users\shrey\A_Repository_for_the_Final_Project_for_ECE_143', 'countries.geo.json') #load data file_1 = "D:/ECE 143/Data files for Project/001_Main SPEED Dataset 2015.xls" df = dh.gather_data_from_Excel(file_1, sh_name='poptotal_ppp', index='country') # make Choropleth object m = choro(location=[30, 15], zoom_start=2.4, geo_data=world_geo, data=df, columns=['ISO', 2009], key_on='feature.id', legend_name='Government Expenditure per capita(US$), 2009', fill_color="YlGn") mapp = m.make_map() mapp = m.add_marker(mapp, [([49.463803, 6.18632], "Luxembourg $30.2k"), ([41.850033, -87.6500523], "USA $10.45k"), ([35, 104], "China $1.6k")]) m.save(mapp, "my_plot.html") display(mapp)
import pandas as pd from class_utilities import Utilities as ut from class_plot_functions import ScatterPlot as sc from class_gather_data import DataHandler as dh #%matplotlib notebook file_1 = "D:\ECE 143\Data files for Project\Education Expenditure.xls" file_2 = "D:\ECE 143\Data files for Project\EDULIT_DS_31052019025144633.csv" ed_raw = dh.gather_data_from_Excel(file_1, sh_name='Data', header=3, index='Country Code') ed_piv = ed_raw lit_raw = dh.gather_data_from_csv(file_2) lit_raw = ut.remove_spaces(lit_raw) cont_raw = dh.cont_cou_map( "D:\ECE 143\Data files for Project\country-and-continent-codes-list.csv") #Youth literacy rate lit_c = lit_raw[( lit_raw.Indicator == "Youth literacy rate, population 15-24 years, both sexes (%)")] lit_piv = ut.pivot(lit_c, index_new='LOCATION', columns_new='TIME', values_new='Value') lit_piv.drop(['TUR', 'SVN', 'BRA', 'MDV', 'MEX', 'ARG', 'COL'], inplace=True) #plot
import numpy as np from class_utilities import Utilities as ut from class_plot_functions import ScatterPlot as sc from class_gather_data import DataHandler as dh #%matplotlib notebook #download from https://www.kaggle.com/unsdsn/world-happiness#2017.csv #download from https://data.worldbank.org/indicator/ny.gdp.pcap.cd file_1 = "D:\ECE 143\Data files for Project\Happiness Report 2017.csv" file_2 = "D:\ECE 143\Data files for Project\GDPpercapita.xls" hap_raw = dh.gather_data_from_csv(file_1, index='Country') gdp_cp_raw = dh.gather_data_from_Excel(file_2, sh_name='Data', index='Country Name') hap = hap_raw['Happiness.Rank'].dropna() hap = hap.drop('South Sudan') gdp_cp = gdp_cp_raw[['Country Code', '2017']].copy() gdp_cp.replace("nan", np.nan, inplace=True) gdp_cp.dropna(inplace=True) hap1, gdp_cp1 = ut.trim(hap, gdp_cp) hap_df = hap1.to_frame() merged = gdp_cp1.merge(hap_df, left_index=True, right_index=True, how='inner') merged.set_index('Country Code', inplace=True) merged.drop([ 'XKX', 'NER', 'AFG', 'LSO', 'KEN', 'GIN', 'GHA', 'KEN', 'MMR', 'PRY', 'ZMB', 'BLR', 'SEN', 'ARM', 'LBR', 'TJK', 'CAF', 'BDI', 'TZA', 'RWA', 'TGO', 'ZWE', 'MLI', 'BEN', 'BFA', 'TCD', 'BTN', 'MDG', 'AUS'
from class_gather_data import DataHandler as dh from class_utilities import Utilities as ut from class_plot_functions import MultiBar as mb #load health expenditure data hl_raw = dh.gather_data_from_csv( r"D:\ECE 143\Data files for Project\UN data health exp.csv", index='Country or Area') hl_piv = ut.pivot(hl_raw, index_new='Country or Area', columns_new='Year(s)', values_new='Value') year_hl = 2010 hl_col_df = ut.pipeline(hl_piv, year_hl, "Health Expenditure") #load education expenditure data ed_raw = dh.gather_data_from_csv( "D:\ECE 143\Data files for Project\Education expenditure UNESCO.csv", index='Country') ed_c = ed_raw[( ed_raw.Indicator == "Expenditure on education as a percentage of total government expenditure (%)" )] ed_piv = ut.pivot(ed_c, index_new='Country', columns_new='Time', values_new='Value') year_ed = 2013 ed_col_df = ut.pipeline(ed_piv, year_ed, "Education Expenditure") #load social expenditure data