Exemple #1
0
    def test_covariates_from_event_matrix_with_simple_addition(self):

        base_df = pd.DataFrame(
            [[1, 0, 5, 1], [2, 0, 4, 1], [3, 0, 8, 1], [4, 0, 4, 1]],
            columns=["id", "start", "stop", "e"])

        event_df = pd.DataFrame([[1, 1], [2, 2], [3, 3], [4, None]],
                                columns=["id", "poison"])
        cv = utils.covariates_from_event_matrix(event_df, "id")
        ldf = utils.add_covariate_to_timeline(base_df,
                                              cv,
                                              "id",
                                              "duration",
                                              "e",
                                              cumulative_sum=True)
        assert pd.notnull(ldf).all().all()

        expected = pd.DataFrame(
            [
                (0.0, 0.0, 1.0, 1, False),
                (1.0, 1.0, 5.0, 1, True),
                (0.0, 0.0, 2.0, 2, False),
                (2.0, 1.0, 4.0, 2, True),
                (0.0, 0.0, 3.0, 3, False),
                (3.0, 1.0, 8.0, 3, True),
                (0.0, 0.0, 4.0, 4, True),
            ],
            columns=["start", "cumsum_poison", "stop", "id", "e"],
        )
        assert_frame_equal(expected, ldf, check_dtype=False, check_like=True)
Exemple #2
0
    def test_covariates_from_event_matrix(self):

        base_df = pd.DataFrame([
                [1, 0, 5, 1],
                [2, 0, 4, 1],
                [3, 0, 8, 1],
                [4, 0, 4, 1]
        ], columns=['id', 'start', 'stop', 'e'])


        event_df = pd.DataFrame([
                [1,   1,    None, 2   ],
                [2,   None, 5,    None],
                [3,   3,    3,    7   ]
             ], columns=['id', 'promotion', 'movement', 'raise'])

        cv = utils.covariates_from_event_matrix(event_df, 'id')
        ldf = utils.add_covariate_to_timeline(base_df, cv, 'id', 'duration', 'e', cumulative_sum=True)
        expected = pd.DataFrame.from_records([
            {'cumsum_movement': 0.0, 'cumsum_promotion': 0.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 1.0, 'start': 0.0, 'stop': 1.0},
            {'cumsum_movement': 0.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 1.0, 'start': 1.0, 'stop': 2.0},
            {'cumsum_movement': 0.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 1.0, 'e': 1.0, 'id': 1.0, 'start': 2.0, 'stop': 5.0},
            {'cumsum_movement': 0.0, 'cumsum_promotion': 0.0, 'cumsum_raise': 0.0, 'e': 1.0, 'id': 2.0, 'start': 0.0, 'stop': 4.0},
            {'cumsum_movement': 0.0, 'cumsum_promotion': 0.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 3.0, 'start': 0.0, 'stop': 3.0},
            {'cumsum_movement': 1.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 0.0, 'e': 0.0, 'id': 3.0, 'start': 3.0, 'stop': 7.0},
            {'cumsum_movement': 1.0, 'cumsum_promotion': 1.0, 'cumsum_raise': 1.0, 'e': 1.0, 'id': 3.0, 'start': 7.0, 'stop': 8.0},
            {'cumsum_movement': None, 'cumsum_promotion': None, 'cumsum_raise': None, 'e': 1.0, 'id': 4.0, 'start': 0.0, 'stop': 4.0}
        ])

        assert_frame_equal(expected, ldf, check_dtype=False, check_like=True)
Exemple #3
0
    def test_covariates_from_event_matrix(self):
        df = pd.DataFrame([[1, 1, None, 2], [2, None, 5, None], [3, 3, 3, 7]],
                          columns=['id', 'promotion', 'movement', 'raise'])

        ldf = pd.DataFrame(
            [[1, 0, 5, 1], [2, 0, 4, 1], [3, 0, 8, 1], [4, 0, 4, 1]],
            columns=['id', 'start', 'stop', 'e'])

        cv = utils.covariates_from_event_matrix(df, 'id')
        ldf = utils.add_covariate_to_timeline(ldf,
                                              cv,
                                              'id',
                                              'duration',
                                              'e',
                                              cumulative_sum=True)
        assert ldf.loc[ldf['id'] == 1]['cumsum_movement'].tolist() == [0, 0, 0]
        assert ldf.loc[ldf['id'] == 1]['cumsum_promotion'].tolist() == [
            0, 1, 1
        ]
        assert ldf.loc[ldf['id'] == 1]['cumsum_raise'].tolist() == [0, 0, 1]
        assert ldf.loc[ldf['id'] == 1]['start'].tolist() == [0, 1., 2.]
Exemple #4
0
    def test_covariates_from_event_matrix(self):

        base_df = pd.DataFrame(
            [[1, 0, 5, 1], [2, 0, 4, 1], [3, 0, 8, 1], [4, 0, 4, 1]], columns=["id", "start", "stop", "e"]
        )

        event_df = pd.DataFrame(
            [[1, 1, None, 2], [2, None, 5, None], [3, 3, 3, 7]], columns=["id", "promotion", "movement", "raise"]
        )

        cv = utils.covariates_from_event_matrix(event_df, "id")
        ldf = utils.add_covariate_to_timeline(base_df, cv, "id", "duration", "e", cumulative_sum=True)
        expected = pd.DataFrame.from_records(
            [
                {
                    "cumsum_movement": 0.0,
                    "cumsum_promotion": 0.0,
                    "cumsum_raise": 0.0,
                    "e": 0.0,
                    "id": 1.0,
                    "start": 0.0,
                    "stop": 1.0,
                },
                {
                    "cumsum_movement": 0.0,
                    "cumsum_promotion": 1.0,
                    "cumsum_raise": 0.0,
                    "e": 0.0,
                    "id": 1.0,
                    "start": 1.0,
                    "stop": 2.0,
                },
                {
                    "cumsum_movement": 0.0,
                    "cumsum_promotion": 1.0,
                    "cumsum_raise": 1.0,
                    "e": 1.0,
                    "id": 1.0,
                    "start": 2.0,
                    "stop": 5.0,
                },
                {
                    "cumsum_movement": 0.0,
                    "cumsum_promotion": 0.0,
                    "cumsum_raise": 0.0,
                    "e": 1.0,
                    "id": 2.0,
                    "start": 0.0,
                    "stop": 4.0,
                },
                {
                    "cumsum_movement": 0.0,
                    "cumsum_promotion": 0.0,
                    "cumsum_raise": 0.0,
                    "e": 0.0,
                    "id": 3.0,
                    "start": 0.0,
                    "stop": 3.0,
                },
                {
                    "cumsum_movement": 1.0,
                    "cumsum_promotion": 1.0,
                    "cumsum_raise": 0.0,
                    "e": 0.0,
                    "id": 3.0,
                    "start": 3.0,
                    "stop": 7.0,
                },
                {
                    "cumsum_movement": 1.0,
                    "cumsum_promotion": 1.0,
                    "cumsum_raise": 1.0,
                    "e": 1.0,
                    "id": 3.0,
                    "start": 7.0,
                    "stop": 8.0,
                },
                {
                    "cumsum_movement": None,
                    "cumsum_promotion": None,
                    "cumsum_raise": None,
                    "e": 1.0,
                    "id": 4.0,
                    "start": 0.0,
                    "stop": 4.0,
                },
            ]
        )

        assert_frame_equal(expected, ldf, check_dtype=False, check_like=True)
Exemple #5
0
 def test_error_is_raised_if_columns_are_missing_in_seed_df(self, seed_df, cv1):
     del seed_df["start"]
     with pytest.raises(IndexError):
         utils.add_covariate_to_timeline(seed_df, cv1, "id", "t", "E")
Exemple #6
0
 def test_error_is_raised_if_columns_are_missing_in_seed_df(
         self, seed_df, cv1):
     del seed_df['start']
     with pytest.raises(IndexError):
         utils.add_covariate_to_timeline(seed_df, cv1, 'id', 't', 'E')
Exemple #7
0
 def test_error_is_raised_if_columns_are_missing_in_seed_df(self, seed_df, cv1):
     del seed_df['start']
     with pytest.raises(IndexError):
         utils.add_covariate_to_timeline(seed_df, cv1, 'id', 't', 'E')
Exemple #8
0
    {"id": 4,  "z": 0, "time": 5},
    {"id": 7,  "z": 1, "time": 5},
    {"id": 8,  "z": 0, "time": 5},
    {"id": 5,  "z": 0, "time": 5},
    {"id": 9,  "z": 1, "time": 5},
    {"id": 10,  "z": 1, "time": 5},

    {"id": 4,  "z": 0, "time": 6},
    {"id": 7,  "z": 1, "time": 6},
    {"id": 8,  "z": 0, "time": 6},
    {"id": 5,  "z": 1, "time": 6},
    {"id": 9,  "z": 1, "time": 6},
    {"id": 10,  "z": 1, "time": 6},

    {"id": 7,  "z": 1, "time": 7},
    {"id": 8,  "z": 0, "time": 7},
    {"id": 5,  "z": 1, "time": 7},
    {"id": 9,  "z": 1, "time": 7},
    {"id": 10,  "z": 1, "time": 7},

    {"id": 8,  "z": 0, "time": 8},
    {"id": 5,  "z": 1, "time": 8},
    {"id": 9,  "z": 1, "time": 8},
    {"id": 10,  "z": 1, "time": 8},

    {"id": 9,  "z": 1, "time": 9},
    {"id": 10,  "z": 1, "time": 9},
])

dfcv = add_covariate_to_timeline(df, cv, "id", "time", "event", add_enum=False)
Exemple #9
0
        "time": 8
    },
    {
        "id": 5,
        "z": 1,
        "time": 8
    },
    {
        "id": 9,
        "z": 1,
        "time": 8
    },
    {
        "id": 10,
        "z": 1,
        "time": 8
    },
    {
        "id": 9,
        "z": 1,
        "time": 9
    },
    {
        "id": 10,
        "z": 1,
        "time": 9
    },
])

dfcv = add_covariate_to_timeline(df, cv, "id", "time", "event", add_enum=False)
#        Prepare DataFrame for lifelines analysis
#------------------------------------------------------------------------------
lf = to_long_format(df, 'age_at_exit')

# Piped version (possibly faster?)
# lf = df.pipe(to_long_format, 'age_at_exit')\
#        .pipe(add_covariate_to_timeline(fund_cv_amt,
#                                        'id', 'time_to_funding', 'success',
#                                        cumulative_sum=False)\

# NOTE WARNING THIS LINE IS SUPER F*****G SLOW.
# Add raised_amount_usd as time-varying covariate
fund_cv_amt = fund_cv[['id', 'raised_amount_usd', 'time_to_funding']]
lf = add_covariate_to_timeline(lf,
                               fund_cv_amt,
                               'id',
                               'time_to_funding',
                               'success',
                               cumulative_sum=False)

# Add cumulative funding as covariate
# lf = add_covariate_to_timeline(lf, fund_cv_amt,
#                                'id', 'time_to_funding', 'success',
#                                cumulative_sum=True)
# Add funding round type as covariate

# WRITE TO PICKLE FILE!!!
# lf.to_pickle('../data/survival_input.pkl')

#==============================================================================
#==============================================================================