def test_covariates_from_event_matrix_with_simple_addition(self): base_df = pd.DataFrame( [[1, 0, 5, 1], [2, 0, 4, 1], [3, 0, 8, 1], [4, 0, 4, 1]], columns=["id", "start", "stop", "e"]) event_df = pd.DataFrame([[1, 1], [2, 2], [3, 3], [4, None]], columns=["id", "poison"]) cv = utils.covariates_from_event_matrix(event_df, "id") ldf = utils.add_covariate_to_timeline(base_df, cv, "id", "duration", "e", cumulative_sum=True) assert pd.notnull(ldf).all().all() expected = pd.DataFrame( [ (0.0, 0.0, 1.0, 1, False), (1.0, 1.0, 5.0, 1, True), (0.0, 0.0, 2.0, 2, False), (2.0, 1.0, 4.0, 2, True), (0.0, 0.0, 3.0, 3, False), (3.0, 1.0, 8.0, 3, True), (0.0, 0.0, 4.0, 4, True), ], columns=["start", "cumsum_poison", "stop", "id", "e"], ) assert_frame_equal(expected, ldf, check_dtype=False, check_like=True)
def test_covariates_from_event_matrix(self): base_df = pd.DataFrame([ [1, 0, 5, 1], [2, 0, 4, 1], [3, 0, 8, 1], [4, 0, 4, 1] ], columns=['id', 'start', 'stop', 'e']) event_df = pd.DataFrame([ [1, 1, None, 2 ], [2, None, 5, None], [3, 3, 3, 7 ] ], columns=['id', 'promotion', 'movement', 'raise']) cv = utils.covariates_from_event_matrix(event_df, 'id') ldf = utils.add_covariate_to_timeline(base_df, cv, 'id', 'duration', 'e', cumulative_sum=True) expected = pd.DataFrame.from_records([ {'cumsum_movement': 0.0, 'cumsum_promotion': 0.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 1.0, 'start': 0.0, 'stop': 1.0}, {'cumsum_movement': 0.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 1.0, 'start': 1.0, 'stop': 2.0}, {'cumsum_movement': 0.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 1.0, 'e': 1.0, 'id': 1.0, 'start': 2.0, 'stop': 5.0}, {'cumsum_movement': 0.0, 'cumsum_promotion': 0.0, 'cumsum_raise': 0.0, 'e': 1.0, 'id': 2.0, 'start': 0.0, 'stop': 4.0}, {'cumsum_movement': 0.0, 'cumsum_promotion': 0.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 3.0, 'start': 0.0, 'stop': 3.0}, {'cumsum_movement': 1.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 3.0, 'start': 3.0, 'stop': 7.0}, {'cumsum_movement': 1.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 1.0, 'e': 1.0, 'id': 3.0, 'start': 7.0, 'stop': 8.0}, {'cumsum_movement': None, 'cumsum_promotion': None, 'cumsum_raise': None, 'e': 1.0, 'id': 4.0, 'start': 0.0, 'stop': 4.0} ]) assert_frame_equal(expected, ldf, check_dtype=False, check_like=True)
def test_covariates_from_event_matrix(self): df = pd.DataFrame([[1, 1, None, 2], [2, None, 5, None], [3, 3, 3, 7]], columns=['id', 'promotion', 'movement', 'raise']) ldf = pd.DataFrame( [[1, 0, 5, 1], [2, 0, 4, 1], [3, 0, 8, 1], [4, 0, 4, 1]], columns=['id', 'start', 'stop', 'e']) cv = utils.covariates_from_event_matrix(df, 'id') ldf = utils.add_covariate_to_timeline(ldf, cv, 'id', 'duration', 'e', cumulative_sum=True) assert ldf.loc[ldf['id'] == 1]['cumsum_movement'].tolist() == [0, 0, 0] assert ldf.loc[ldf['id'] == 1]['cumsum_promotion'].tolist() == [ 0, 1, 1 ] assert ldf.loc[ldf['id'] == 1]['cumsum_raise'].tolist() == [0, 0, 1] assert ldf.loc[ldf['id'] == 1]['start'].tolist() == [0, 1., 2.]
def test_covariates_from_event_matrix(self): base_df = pd.DataFrame( [[1, 0, 5, 1], [2, 0, 4, 1], [3, 0, 8, 1], [4, 0, 4, 1]], columns=["id", "start", "stop", "e"] ) event_df = pd.DataFrame( [[1, 1, None, 2], [2, None, 5, None], [3, 3, 3, 7]], columns=["id", "promotion", "movement", "raise"] ) cv = utils.covariates_from_event_matrix(event_df, "id") ldf = utils.add_covariate_to_timeline(base_df, cv, "id", "duration", "e", cumulative_sum=True) expected = pd.DataFrame.from_records( [ { "cumsum_movement": 0.0, "cumsum_promotion": 0.0, "cumsum_raise": 0.0, "e": 0.0, "id": 1.0, "start": 0.0, "stop": 1.0, }, { "cumsum_movement": 0.0, "cumsum_promotion": 1.0, "cumsum_raise": 0.0, "e": 0.0, "id": 1.0, "start": 1.0, "stop": 2.0, }, { "cumsum_movement": 0.0, "cumsum_promotion": 1.0, "cumsum_raise": 1.0, "e": 1.0, "id": 1.0, "start": 2.0, "stop": 5.0, }, { "cumsum_movement": 0.0, "cumsum_promotion": 0.0, "cumsum_raise": 0.0, "e": 1.0, "id": 2.0, "start": 0.0, "stop": 4.0, }, { "cumsum_movement": 0.0, "cumsum_promotion": 0.0, "cumsum_raise": 0.0, "e": 0.0, "id": 3.0, "start": 0.0, "stop": 3.0, }, { "cumsum_movement": 1.0, "cumsum_promotion": 1.0, "cumsum_raise": 0.0, "e": 0.0, "id": 3.0, "start": 3.0, "stop": 7.0, }, { "cumsum_movement": 1.0, "cumsum_promotion": 1.0, "cumsum_raise": 1.0, "e": 1.0, "id": 3.0, "start": 7.0, "stop": 8.0, }, { "cumsum_movement": None, "cumsum_promotion": None, "cumsum_raise": None, "e": 1.0, "id": 4.0, "start": 0.0, "stop": 4.0, }, ] ) assert_frame_equal(expected, ldf, check_dtype=False, check_like=True)
def test_error_is_raised_if_columns_are_missing_in_seed_df(self, seed_df, cv1): del seed_df["start"] with pytest.raises(IndexError): utils.add_covariate_to_timeline(seed_df, cv1, "id", "t", "E")
def test_error_is_raised_if_columns_are_missing_in_seed_df( self, seed_df, cv1): del seed_df['start'] with pytest.raises(IndexError): utils.add_covariate_to_timeline(seed_df, cv1, 'id', 't', 'E')
def test_error_is_raised_if_columns_are_missing_in_seed_df(self, seed_df, cv1): del seed_df['start'] with pytest.raises(IndexError): utils.add_covariate_to_timeline(seed_df, cv1, 'id', 't', 'E')
{"id": 4, "z": 0, "time": 5}, {"id": 7, "z": 1, "time": 5}, {"id": 8, "z": 0, "time": 5}, {"id": 5, "z": 0, "time": 5}, {"id": 9, "z": 1, "time": 5}, {"id": 10, "z": 1, "time": 5}, {"id": 4, "z": 0, "time": 6}, {"id": 7, "z": 1, "time": 6}, {"id": 8, "z": 0, "time": 6}, {"id": 5, "z": 1, "time": 6}, {"id": 9, "z": 1, "time": 6}, {"id": 10, "z": 1, "time": 6}, {"id": 7, "z": 1, "time": 7}, {"id": 8, "z": 0, "time": 7}, {"id": 5, "z": 1, "time": 7}, {"id": 9, "z": 1, "time": 7}, {"id": 10, "z": 1, "time": 7}, {"id": 8, "z": 0, "time": 8}, {"id": 5, "z": 1, "time": 8}, {"id": 9, "z": 1, "time": 8}, {"id": 10, "z": 1, "time": 8}, {"id": 9, "z": 1, "time": 9}, {"id": 10, "z": 1, "time": 9}, ]) dfcv = add_covariate_to_timeline(df, cv, "id", "time", "event", add_enum=False)
"time": 8 }, { "id": 5, "z": 1, "time": 8 }, { "id": 9, "z": 1, "time": 8 }, { "id": 10, "z": 1, "time": 8 }, { "id": 9, "z": 1, "time": 9 }, { "id": 10, "z": 1, "time": 9 }, ]) dfcv = add_covariate_to_timeline(df, cv, "id", "time", "event", add_enum=False)
# Prepare DataFrame for lifelines analysis #------------------------------------------------------------------------------ lf = to_long_format(df, 'age_at_exit') # Piped version (possibly faster?) # lf = df.pipe(to_long_format, 'age_at_exit')\ # .pipe(add_covariate_to_timeline(fund_cv_amt, # 'id', 'time_to_funding', 'success', # cumulative_sum=False)\ # NOTE WARNING THIS LINE IS SUPER F*****G SLOW. # Add raised_amount_usd as time-varying covariate fund_cv_amt = fund_cv[['id', 'raised_amount_usd', 'time_to_funding']] lf = add_covariate_to_timeline(lf, fund_cv_amt, 'id', 'time_to_funding', 'success', cumulative_sum=False) # Add cumulative funding as covariate # lf = add_covariate_to_timeline(lf, fund_cv_amt, # 'id', 'time_to_funding', 'success', # cumulative_sum=True) # Add funding round type as covariate # WRITE TO PICKLE FILE!!! # lf.to_pickle('../data/survival_input.pkl') #============================================================================== #==============================================================================