def merge_by_year(year): ''' The required funtion for question 5 ''' income = transformIncome() countries = loadCountries() # get the income of a particular year income_year = income.loc[[year]] #transpose the dataframe back income_year = income_year.T income_year.index.name = 'Country' merged = countries.join(income_year, on = 'Country', how = 'right') merged.rename(columns = {year: 'Income'}, inplace = True) return merged
def checkYear(input): ''' check if the input year exists in the data ''' income = transformIncome() # create a list of years appearing in the data year_list = list(income.index) if input.isdigit(): year = int(input) if year in year_list: return True else: return False else: return False
def plotIncome(input): ''' given a valid input, plot the distribution of the income ''' income = transformIncome() title = 'Distribution of income per capita in ' + input # columns not included in our plot year = int(input) null_idx = income.loc[year].isnull() values = income.loc[year, ~null_idx].values plt.figure() # plot the histogram plt.hist(values, bins = 20) plt.title(title) plt.ylabel('Frequency') plt.xlabel('Income per capita') plt.show()