Example #1
0
 def test_merge_by_year_3(self):
     """this is to test for the third time"""
     countries = pd.read_csv("countries.csv", sep=",")
     income = pd.read_excel("indicator gapminder gdp_per_capita_ppp.xlsx", sheetname="Data", index_col=0)
     income2 = income.transpose()
     result = data_process_functions.merge_by_year(1800, countries, income2)
     self.assertEqual(result["Country"][27], "Mauritius")
     self.assertEqual(result["Region"][27], "AFRICA")
     self.assertEqual(int(result["Income"][27]), 799)
Example #2
0
 def test_merge_by_year_1(self):
     """this is to test for the first time"""
     countries = pd.read_csv("./countries.csv", sep=",")
     income = pd.read_excel("./indicator gapminder gdp_per_capita_ppp.xlsx", sheetname="Data", index_col=0)
     income2 = income.transpose()
     result = data_process_functions.merge_by_year(1800, countries, income2)
     self.assertEqual(result["Country"][176], "Venezuela")
     self.assertEqual(result["Region"][176], "SOUTH AMERICA")
     self.assertEqual(int(result["Income"][176]), 442)
Example #3
0
 def test_merge_by_year_case4(self):
     #load the data
     countries = pd.read_csv('countries.csv',sep=',')
     income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0)
     income_new = income.transpose()
      #merge function
     merged_result = data_process_functions.merge_by_year(2012,countries,income_new)
     self.assertEqual(merged_result['Country'][147],'Saint Kitts and Nevis')
     self.assertEqual(merged_result['Region'][147],'NORTH AMERICA')
     self.assertEqual(int(merged_result['Income'][147]),12659)
Example #4
0
 def test_merge_by_year_case3(self):
     #load the data
     countries = pd.read_csv('countries.csv',sep=',')
     income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0)
     income_new = income.transpose()
      #merge function
     merged_result = data_process_functions.merge_by_year(1800,countries,income_new)
     self.assertEqual(merged_result['Country'][176],'Venezuela')
     self.assertEqual(merged_result['Region'][176],'SOUTH AMERICA')
     self.assertEqual(int(merged_result['Income'][176]),442)
Example #5
0
 def test_merge_by_year_case1(self):
     #load the data
     countries = pd.read_csv('countries.csv',sep=',')
     income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0)
     income_new = income.transpose()
     #merge function
     merged_result = data_process_functions.merge_by_year(2003,countries,income_new)
     self.assertEqual(merged_result['Country'][27],'Mauritius')
     self.assertEqual(merged_result['Region'][27],'AFRICA')
     self.assertEqual(int(merged_result['Income'][27]),9564)
Example #6
0
def generate_answers_hw9():
    '''this function will generate answers of assignment 9 '''
    #Q1
    countries = pd.read_csv('countries.csv',sep=',')
    #Q2 load the xlsx data and on the sheet 'Data'
    income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0)
    #Q3
    income_new = income.transpose()#the transpose of the dataframe income
    print "The head of the data set:"
    print income_new.head() 
    print "The data has been loaded successfully!\n"
    #the user interaction part
    while True:
        try:
            print "Please Enter a year from 1800 to 2012 to check the income distribution (for example: 2003), enter 'finish' to stop checking"
            print "(Hint:Close the plot window to continue)"
            year_input = raw_input()
            if year_input == "finish": #quit the program
                print "the check process is finished"
                break
            if year_input == "": #test empty input 
                raise Empty_Input_Error

            if not re.match(r'^[1-9][0-9][0-9][0-9]$',year_input):
                raise Invalid_Input_Error
            
            else:
                year_number = int(year_input)
                if year_number in income_new.index:
                    #Q4 display the distribution of income per person across all countries for the given year 
                    data_process_functions.display_income_dist_by_year(year_number,income_new)
                else:
                    raise Invalid_Input_Error

        except Empty_Input_Error:
            print "Warning: The input is empty! please re-enter the list"
            
        except Invalid_Input_Error:
            print "Warning: The input number is not valid! please re-enter the year"

    #generate the plots from 2007 and 2012
    print "the program is generating graphs for the years 2007-2012..."
    for year_ind in range(2007,2013):
        merged_data = data_process_functions.merge_by_year(year_ind,countries,income_new) # Q5
        data_analysis_instance = data_analysis_tools.Data_Analysis_Tools(year_ind, merged_data) #Q6
        data_analysis_instance.plot_boxplots() #Q8
        data_analysis_instance.plot_histograms() #Q8 

    print "Congratulations! the results are saved succeffully, thanks for trying ,bye"
Example #7
0
def generate_answers():
    '''this is to generate the answers'''
    # question 1
    countries = pd.read_csv('./countries.csv', sep= ',') 
    # question 2
    income = pd.read_excel('./indicator gapminder gdp_per_capita_ppp.xlsx', sheetname='Data', index_col=0) 
    # question 3
    income2 = income.transpose() 
    print "The head of the data set is:"
    print income2.head()

    while True:
          try:
             print "Please Enter a year from 1800 to 2012 to check the income distribution (hint: enter 'finish' to stop displaying)"
             year_input = raw_input()
             if year_input == "finish": 
                break
             if year_input == "": 
                raise Empty_Input_Error
             if not re.match(r'^[1-9][0-9][0-9][0-9]$',year_input):
                raise Invalid_Input_Error
             else:
  	        year_number = int(year_input)
             if year_number in income2.index:
               # question4: display the distribution of income per person across all countries for the given year
                data_process_functions.display_income_distribution(year_number,income2)
             else:
                raise Invalid_Input_Error
          except Empty_Input_Error:
 	         pass
          except Invalid_Input_Error:
                 pass

    # question 8: generate graphs for the years of 2007-2012
    print "The graphs for 2007-2012 are generating..."
    for year_i in range(2007,2013):
        merged_data = data_process_functions.merge_by_year(year_i, countries, income2)
        graph_data = data_analysis_tools.data_analysis(year_i, merged_data)
        graph_data.plot_histograms()
	graph_data.plot_boxplots()
    print "The graphs for 2007-2012 have been successfully generated. Please check the directory. Thanks!"