Exemple #1
0
    def predict(self, R, Thetas=dict(), _type='cumulative_hazards', **kwargs):
        """
        Assuming that the type to refit is the first type of
         predictive_relationship
        """

        if not self.regression_:
            raise Exception("No regression was fitted on the traning")

        X = self._modify_test_data(R, Thetas)
        if _type == 'cumulative_hazards':
            return AalenAdditiveFitter.predict_cumulative_hazard(
                self, X, id_col=kwargs.get('id_col', None))
        elif _type == 'survival_function':
            return AalenAdditiveFitter.predict_survival_function(self, X)
        elif _type == 'percentile':
            return AalenAdditiveFitter.predict_percentile(
                self, X, kwargs.get('p', 0))
        elif _type == 'median':
            return AalenAdditiveFitter.predict_median(self, X)
        elif _type == 'expectation':
            return AalenAdditiveFitter.predict_expectation(self, X)
        else:
            raise ValueError("Not avaialble type of prediction")
#learned to predict individual hazard rates, survival functions, and median survival time. The dataset we are using is
#limited to 2017, so let’s use this data to predict the (though already partly seen) possible duration of couples who get married in 2017
#In the 4 different states which we are studying
#First we select 4 random couples married in 2017 each of which from a different state
# We are subsutting the dataframe 'data' to a smaller dataframe of 4 cpuples only and compare our prediction outputs'
row = {'State[Alabama]':[0.0] , 'State[Maryland]':[1.0],  'State[Mississippi]':[0.0], 'State[New Hampshire]':[0.0],  \
   'Couple_Race[T.Same-Race]':[1.0], 'Household_Income_Range[T.42,830$ - 44,765$]':[0.0],  \
   'Household_Income_Range[T.66,532$ - 70,303$]':[0.0],'Household_Income_Range[T.67,500$ - 75,000$]':[1.0], \
   'Husband_Education[T.16+ years]':[1.0] , 'Husband_Education[T.Less than 12 years]':[0.0], \
   'Husband_Race[T.Other Ethnic Groups]':[1.0],  'Marriage_Date':[2016], 'T':1,  'E':[0]}
MD = pd.DataFrame(data=row)
print("MD couple's unique data point", MD)

##plotting the predicted value for this specific couple
ax = plt.subplot(2, 1, 1)
aaf.predict_cumulative_hazard(MD).plot(ax=ax, legend=False)
plt.title('Mississippi Couple predicted Hazard and Survival time')
ax = plt.subplot(2, 1, 2)
aaf.predict_survival_function(MD).plot(ax=ax, legend=False)
plt.savefig('/home/raed/Dropbox/INSE - 6320/Final Project/MarylandCouple.pdf')
plt.show()

#same idea for Albama couple , we choose the same education level , ethnicity to keep our comparison valid
row = {'State[Alabama]':[1.0] , 'State[Maryland]':[0.0],  'State[Mississippi]':[0.0], 'State[New Hampshire]':[0.0],  \
   'Couple_Race[T.Same-Race]':[1.0], 'Household_Income_Range[T.42,830$ - 44,765$]':[1.0],  \
   'Household_Income_Range[T.66,532$ - 70,303$]':[0.0],'Household_Income_Range[T.67,500$ - 75,000$]':[0.0], \
   'Husband_Education[T.16+ years]':[0.0] , 'Husband_Education[T.Less than 12 years]':[0.0], \
   'Husband_Race[T.Other Ethnic Groups]':[1.0],  'Marriage_Date':[2016], 'T':1,  'E':[0]}
AL = pd.DataFrame(data=row)
print("AL couple's unique data point", AL)