def lifeExpectancyTotal(sex, region, dob): if not isinstance(dob, date): raise TypeError('One or more arguments did not match the expected parameter type') if dob < date(1920, 1, 1) or dob > date(2059, 12, 31): raise BirthdateOutOfRangeError(dob, 'between 1920-01-01 and 2059-12-31') age = relativedelta(years=35) # set an arbitrary age that keeps our calculation in the boundaries of lifeExpectancyRemaining() age_float = relativedelta_to_decimal_years(age) refdate = dob + age return age_float + lifeExpectancyRemaining(sex, region, refdate, age)
def lifeExpectancyTotal(sex, region, dob): if not isinstance(dob, date): raise TypeError( 'One or more arguments did not match the expected parameter type') if dob < date(1920, 1, 1) or dob > date(2059, 12, 31): raise BirthdateOutOfRangeError(dob, 'between 1920-01-01 and 2059-12-31') age = relativedelta( years=35 ) # set an arbitrary age that keeps our calculation in the boundaries of lifeExpectancyRemaining() age_float = relativedelta_to_decimal_years(age) refdate = dob + age return age_float + lifeExpectancyRemaining(sex, region, refdate, age)
def calculateMortalityDistribution(country, sex, age): # check that all arguments have the right type (even though it's not very pythonic) if not isinstance(sex, basestring) or not isinstance( country, basestring) or not isinstance(age, relativedelta): raise TypeError( 'One or more arguments did not match the expected parameter type') # confirm that sex and region contain valid values if sex not in SEXES_LIFE_EXPECTANCY: raise InvalidSexError(sex) if country not in dataStore.countries: raise InvalidCountryError(country) age_float = relativedelta_to_decimal_years(age) if age_float > 120: raise AgeOutOfRangeError(age) # helper function def setInterpDate(x, offset): """ ??? """ idate = datetime.strptime( str(x + offset + 3) + "/1" + "/1", "%Y/%m/%d") return (idate - datetime(1970, 1, 1)).days def rounddown(x, base=5): return int(base * math.floor(float(x) / base)) # get columns which correspond to the inputs idate = datetime.utcnow().date() iage = age_float iyear = idate.year flr_yr = rounddown(iyear, base=5) # get closest age in 5 year windows flr_age = rounddown(iage, base=5) # Get the age cohort if flr_age >= 5: cohort_st = list( dataStore.survival_ratio.columns).index("X" + str(flr_age - 5)) else: cohort_st = 4 cohort_end = len(dataStore.survival_ratio.columns) #cohort = dataStore.survival_ratio.loc[(dataStore.survival_ratio.region==country) & (dataStore.survival_ratio.sex==SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.survival_ratio.Begin_prd >=(flr_yr-5))].ix[:,cohort_st:cohort_end] # get older and younger cohort cohort_old = dataStore.survival_ratio.loc[ (dataStore.survival_ratio.region == country) & (dataStore.survival_ratio.sex == SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.survival_ratio.Begin_prd >= (flr_yr - 10))].ix[:, cohort_st:cohort_end] #cohort_young = dataStore.survival_ratio.loc[(dataStore.survival_ratio.region==country) & (dataStore.survival_ratio.sex==SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.survival_ratio.Begin_prd >=(flr_yr))].ix[:,cohort_st:cohort_end] # get dates for the jan 1st for 3 years --> then to Unix timestamp dates = [ setInterpDate(flr_yr, -5), setInterpDate(flr_yr, 0), setInterpDate(flr_yr, +5) ] # make the output dataStore.survival_ratiotable temp = np.zeros(shape=(len(cohort_old.columns), 7)) odata = pd.DataFrame(temp, columns=[ "lower_age", "pr0", "pr1", "pr2", "pr_sx_date", "death_percent", "dth_pc_after_exact_age" ]) # fill in with existing values if iage >= 5: odata['lower_age'] = np.arange(flr_age - 5, 130, 5) else: odata['lower_age'] = np.arange(0, 130, 5) odata['pr0'] = np.matrix(cohort_old).diagonal().T odata['pr1'] = np.matrix(cohort_old).diagonal(-1).T odata['pr2'] = np.matrix(cohort_old).diagonal(-2).T # Interpolate for the input date (idate) odata["pr_sx_date"] = np.array([ InterpolatedUnivariateSpline(dates, list(odata.ix[i, 1:4]), k=2)(inPosixDays(idate)) for i in np.arange(0, len(cohort_old.columns), 1) ]) clen = len(odata) # calc the % deaths odata["death_percent"][1] = 100 for i in np.arange(2, clen, 1): odata["death_percent"][i] = odata["death_percent"][ i - 1] * odata["pr_sx_date"][i] # percentage deaths for i in np.arange(1, clen - 1, 1): odata["dth_pc_after_exact_age"][ i] = odata["death_percent"][i] - odata["death_percent"][i + 1] odata["dth_pc_after_exact_age"][clen - 1] = odata["death_percent"][clen - 1] # proportion of people who will die before iage beforeDod = odata["dth_pc_after_exact_age"][1] * (iage - flr_age) / 5 odata["dth_pc_after_exact_age"][ 1] = odata["dth_pc_after_exact_age"][1] - beforeDod odata["dth_pc_after_exact_age"] = odata[ "dth_pc_after_exact_age"] * 100 / odata["dth_pc_after_exact_age"].sum( ) # add 5 to each of the "ages" if iage >= 5: odata["lower_age"] = odata["lower_age"] + 5 else: odata["lower_age"] = odata["lower_age"] + iage output = odata.ix[0:clen - 1, ['lower_age', 'dth_pc_after_exact_age']] return list(output.values)
def lifeExpectancyRemaining(sex, region, refdate, age): # check that all arguments have the right type (even though it's not very pythonic) if not isinstance(sex, basestring) or not isinstance( region, basestring) or not isinstance( refdate, date) or not isinstance(age, relativedelta): raise TypeError( 'One or more arguments did not match the expected parameter type') # confirm that sex and region contain valid values if sex not in SEXES_LIFE_EXPECTANCY: raise InvalidSexError(sex) if region not in dataStore.countries: raise InvalidCountryError(region) # check the various date requirements if refdate < date(1955, 1, 1) or refdate >= date(2095, 1, 1): raise CalculationDateOutOfRangeError(refdate, 'from 1955-01-01 to 2094-12-31') age_float = relativedelta_to_decimal_years(age) if age_float > 120: raise AgeOutOfRangeError(age) if refdate - age > date(2095, 6, 30): raise EffectiveBirthdateOutOfRangeError(invalidValue=(refdate - age)) # find beginning of 5 yearly period for the le_date le_yr = refdate.year lowest_year = math.floor(int(le_yr) / 5) * 5 # extract a row corresponding to the time-period life_exp_prd_5below = dataStore.life_expectancy_ages[ (dataStore.life_expectancy_ages.region == region) & (dataStore.life_expectancy_ages.sex == SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.life_expectancy_ages.Begin_prd == lowest_year - 5)] life_exp_prd_ext = dataStore.life_expectancy_ages[ (dataStore.life_expectancy_ages.region == region) & (dataStore.life_expectancy_ages.sex == SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.life_expectancy_ages.Begin_prd == lowest_year)] life_exp_prd_5above = dataStore.life_expectancy_ages[ (dataStore.life_expectancy_ages.region == region) & (dataStore.life_expectancy_ages.sex == SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.life_expectancy_ages.Begin_prd == lowest_year + 5)] # life_exp_prd life_exp_prd = pd.concat( [life_exp_prd_5below, life_exp_prd_ext, life_exp_prd_5above]) life_exp_prd = life_exp_prd.ix[:, 4:len(life_exp_prd.columns)] # Place holder for Agenames and values for three consecutive periods of interest life_exp_ = np.zeros((len(life_exp_prd.columns), 4)) # Age group starting at and less than the next value: 0, 1, 5, 10 life_exp_[:, 0] = np.insert((np.arange(5, 130, 5)), 0, [0, 1]) # transpose the dataframe - prep for assinging life expectancy vals life_exp_prd = life_exp_prd.T # Assigning life expectancy values life_exp_[:, 1] = life_exp_prd[life_exp_prd.columns[0]].values life_exp_[:, 2] = life_exp_prd[life_exp_prd.columns[1]].values life_exp_[:, 3] = life_exp_prd[life_exp_prd.columns[2]].values # interpolations xx_interp1 = InterpolatedUnivariateSpline(life_exp_[:, 0], life_exp_[:, 1]) xx_interp2 = InterpolatedUnivariateSpline(life_exp_[:, 0], life_exp_[:, 2]) xx_interp3 = InterpolatedUnivariateSpline(life_exp_[:, 0], life_exp_[:, 3]) # predictions x_interp1 = xx_interp1( age_float) #interpolated value for AGE in earlier 5 yearly period x_interp2 = xx_interp2( age_float ) #interpolated value for AGE in the 5 yearly period of interest x_interp3 = xx_interp3( age_float) #interpolated value for AGE in 5 yearly period after # matrix of vals life_exp_yr = np.zeros((3, 2)) #The mid point of period 2010-2015 which is from 1st July 2010 to June 30 of 2015, therefore, the mid point is 1st Jan 2013 #In the following we turn the year to the date and then to numeric. We will use these to interpolate between periods and then predict the le for exact date addDate = lambda d: inPosixDays(date(int(d) + 3, 1, 1)) life_exp_yr[:, 0] = [ addDate(lowest_year - 5), addDate(lowest_year), addDate(lowest_year + 5) ] life_exp_yr[:, 1] = [x_interp1, x_interp2, x_interp3] life_exp_spl = InterpolatedUnivariateSpline(life_exp_yr[:, 0], life_exp_yr[:, 1], k=2) return life_exp_spl(inPosixDays(refdate))[()]
def lifeExpectancyRemaining(sex, region, refdate, age): # check that all arguments have the right type (even though it's not very pythonic) if ( not isinstance(sex, basestring) or not isinstance(region, basestring) or not isinstance(refdate, date) or not isinstance(age, relativedelta) ): raise TypeError("One or more arguments did not match the expected parameter type") # confirm that sex and region contain valid values if sex not in SEXES_LIFE_EXPECTANCY: raise InvalidSexError(sex) if region not in dataStore.countries: raise InvalidCountryError(region) # check the various date requirements if refdate < date(1955, 1, 1) or refdate >= date(2095, 1, 1): raise CalculationDateOutOfRangeError(refdate, "from 1955-01-01 to 2094-12-31") age_float = relativedelta_to_decimal_years(age) if age_float > 100: raise AgeOutOfRangeError(age) # find beginning of 5 yearly period for the le_date le_yr = refdate.year # (le_date.loc['1'])[0:4] lowest_year = math.floor(int(le_yr) / 5) * 5 # extract a row corresponding to the time-period life_exp_prd_5below = dataStore.life_expectancy_ages[ (dataStore.life_expectancy_ages.region == region) & (dataStore.life_expectancy_ages.sex == SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.life_expectancy_ages.Begin_prd == lowest_year - 5) ] life_exp_prd_ext = dataStore.life_expectancy_ages[ (dataStore.life_expectancy_ages.region == region) & (dataStore.life_expectancy_ages.sex == SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.life_expectancy_ages.Begin_prd == lowest_year) ] life_exp_prd_5above = dataStore.life_expectancy_ages[ (dataStore.life_expectancy_ages.region == region) & (dataStore.life_expectancy_ages.sex == SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.life_expectancy_ages.Begin_prd == lowest_year + 5) ] life_exp_prd = pd.concat([life_exp_prd_5below, life_exp_prd_ext, life_exp_prd_5above]) life_exp_prd = life_exp_prd.ix[:, 7 : len(life_exp_prd.columns)] # Place holder for Agenames and values for three consecutive periods of interest life_exp_ = np.zeros((len(life_exp_prd.columns), 4)) # Age group starting at and less than the next value: 0, 1, 5, 10 life_exp_[:, 0] = np.insert((np.arange(5, 105, 5)), 0, [0, 1]) # transpose the dataframe - prep for assinging life expectancy vals life_exp_prd = life_exp_prd.T # Assigning life expectancy values life_exp_[:, 1] = life_exp_prd[life_exp_prd.columns[0]].values life_exp_[:, 2] = life_exp_prd[life_exp_prd.columns[1]].values life_exp_[:, 3] = life_exp_prd[life_exp_prd.columns[2]].values # interpolations xx_interp1 = InterpolatedUnivariateSpline(life_exp_[:, 0], life_exp_[:, 1]) xx_interp2 = InterpolatedUnivariateSpline(life_exp_[:, 0], life_exp_[:, 2]) xx_interp3 = InterpolatedUnivariateSpline(life_exp_[:, 0], life_exp_[:, 3]) # predictions x_interp1 = xx_interp1(age_float) # interpolated value for AGE in earlier 5 yearly period x_interp2 = xx_interp2(age_float) # interpolated value for AGE in the 5 yearly period of interest x_interp3 = xx_interp3(age_float) # interpolated value for AGE in 5 yearly period after # matrix of vals life_exp_yr = np.zeros((3, 2)) # The mid point of period 2010-2015 which is from 1st July 2010 to June 30 of 2015, therefore, the mid point is 1st Jan 2013 # In the following we turn the year to the date and then to numeric. We will use these to interpolate between periods and then predict the le for exact date addDate = lambda d: inPosixDays(date(int(d) + 3, 1, 1)) life_exp_yr[:, 0] = [addDate(lowest_year - 5), addDate(lowest_year), addDate(lowest_year + 5)] life_exp_yr[:, 1] = [x_interp1, x_interp2, x_interp3] return life_exp_yr[0, 1]
def calculateMortalityDistribution(country, sex, age): # check that all arguments have the right type (even though it's not very pythonic) if not isinstance(sex, basestring) or not isinstance(country, basestring) or not isinstance(age, relativedelta): raise TypeError('One or more arguments did not match the expected parameter type') # confirm that sex and region contain valid values if sex not in SEXES_LIFE_EXPECTANCY: raise InvalidSexError(sex) if country not in dataStore.countries: raise InvalidCountryError(country) age_float = relativedelta_to_decimal_years(age) if age_float > 120: raise AgeOutOfRangeError(age) # helper function def setInterpDate(x, offset): """ ??? """ idate = datetime.strptime(str(x+offset+3)+"/1"+"/1", "%Y/%m/%d") return (idate - datetime(1970, 1, 1)).days def rounddown(x, base=5): return int(base * math.floor(float(x)/base)) # get columns which correspond to the inputs idate = datetime.utcnow().date() iage = age_float iyear = idate.year flr_yr = rounddown(iyear, base=5) # get closest age in 5 year windows flr_age = rounddown(iage, base=5) # Get the age cohort if flr_age >= 5: cohort_st = list(dataStore.survival_ratio.columns).index("X"+str(flr_age-5)) else: cohort_st = 4 cohort_end = len(dataStore.survival_ratio.columns) #cohort = dataStore.survival_ratio.loc[(dataStore.survival_ratio.region==country) & (dataStore.survival_ratio.sex==SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.survival_ratio.Begin_prd >=(flr_yr-5))].ix[:,cohort_st:cohort_end] # get older and younger cohort cohort_old = dataStore.survival_ratio.loc[(dataStore.survival_ratio.region==country) & (dataStore.survival_ratio.sex==SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.survival_ratio.Begin_prd >=(flr_yr-10))].ix[:,cohort_st:cohort_end] #cohort_young = dataStore.survival_ratio.loc[(dataStore.survival_ratio.region==country) & (dataStore.survival_ratio.sex==SEXES_LIFE_EXPECTANCY[sex]) & (dataStore.survival_ratio.Begin_prd >=(flr_yr))].ix[:,cohort_st:cohort_end] # get dates for the jan 1st for 3 years --> then to Unix timestamp dates = [setInterpDate(flr_yr, -5), setInterpDate(flr_yr, 0), setInterpDate(flr_yr, +5)] # make the output dataStore.survival_ratiotable temp = np.zeros(shape=(len(cohort_old.columns),7)) odata = pd.DataFrame(temp, columns=["lower_age","pr0","pr1","pr2","pr_sx_date","death_percent","dth_pc_after_exact_age"]) # fill in with existing values if iage>=5: odata['lower_age'] = np.arange(flr_age-5, 130, 5) else: odata['lower_age'] = np.arange(0, 130, 5) odata['pr0'] = np.matrix(cohort_old).diagonal().T odata['pr1'] = np.matrix(cohort_old).diagonal(-1).T odata['pr2'] = np.matrix(cohort_old).diagonal(-2).T # Interpolate for the input date (idate) odata["pr_sx_date"] = np.array([InterpolatedUnivariateSpline(dates,list(odata.ix[i,1:4]),k=2)(inPosixDays(idate)) for i in np.arange(0, len(cohort_old.columns),1)]) clen = len(odata) # calc the % deaths odata["death_percent"][1] = 100 for i in np.arange(2,clen,1): odata["death_percent"][i] = odata["death_percent"][i-1]*odata["pr_sx_date"][i] # percentage deaths for i in np.arange(1,clen-1,1): odata["dth_pc_after_exact_age"][i] = odata["death_percent"][i] - odata["death_percent"][i+1] odata["dth_pc_after_exact_age"][clen-1] = odata["death_percent"][clen-1] # proportion of people who will die before iage beforeDod = odata["dth_pc_after_exact_age"][1] * (iage - flr_age)/5 odata["dth_pc_after_exact_age"][1] = odata["dth_pc_after_exact_age"][1] - beforeDod odata["dth_pc_after_exact_age"] = odata["dth_pc_after_exact_age"]* 100/odata["dth_pc_after_exact_age"].sum() # add 5 to each of the "ages" if iage>=5: odata["lower_age"] = odata["lower_age"]+5 else: odata["lower_age"] = odata["lower_age"]+iage output = odata.ix[0:clen-1,['lower_age', 'dth_pc_after_exact_age']] return list(output.values)