Esempio n. 1
0
    def UpdateData(self):
        to_long_format(self._df, self.event_name)

        self.event = self._df[self.event_name]
        self.duration = self._df[self.duration_name]

        if np.unique(self.event.to_numpy()).size != 2:
            mylog.error('Key \'{}\' must have only two values'.format(
                self.event_name))
            raise ValueError

        new_df = self.df.drop(columns=[self.event_name, self.duration_name],
                              inplace=False)
        self._array = new_df.values
        self._case_name = list(new_df.index)
        self._feature_name = list(new_df.columns)
Esempio n. 2
0
 def seed_df(self):
     df = pd.DataFrame.from_records([{
         "id": 1,
         "var1": 0.1,
         "T": 10,
         "E": 1
     }, {
         "id": 2,
         "var1": 0.5,
         "T": 12,
         "E": 0
     }])
     return utils.to_long_format(df, "T")
Esempio n. 3
0
 def seed_df(self):
     df = pd.DataFrame.from_records([{
         'id': 1,
         'var1': 0.1,
         'T': 10,
         'E': 1
     }, {
         'id': 2,
         'var1': 0.5,
         'T': 12,
         'E': 0
     }])
     return utils.to_long_format(df, 'T')
Esempio n. 4
0
 def seed_df(self):
     df = pd.DataFrame.from_records([
         {'id': 1, 'var1': 0.1, 'T': 10, 'E': 1},
         {'id': 2, 'var1': 0.5, 'T': 12, 'E': 0}
     ])
     return utils.to_long_format(df, 'T')
Esempio n. 5
0
df = pd.DataFrame([
    [1, 3, True, 1],
    [6, 4, False, 0],
    [3, 5, True, 1],
    [2, 5, False, 1],
    [4, 6, True, 1],
    [7, 7, True, 0],
    [8, 8, False, 0],
    [5, 8, False, 1],
    [9, 9, True, 0],
    [10, 10, True, 0],
], columns=['id', 'time', 'event', 'group'])


df = to_long_format(df, 'time')

cv = pd.DataFrame.from_records([
    {"id": 1,  "z": 0, "time": 0},
    {"id": 6,  "z": 1, "time": 0},
    {"id": 3,  "z": 1, "time": 0},
    {"id": 2,  "z": 0, "time": 0},
    {"id": 4,  "z": 0, "time": 0},
    {"id": 7,  "z": 0, "time": 0},
    {"id": 8,  "z": 0, "time": 0},
    {"id": 5,  "z": 0, "time": 0},
    {"id": 9,  "z": 0, "time": 0},
    {"id": 10,  "z": 0, "time": 0},

    {"id": 1,  "z": 0, "time": 3},
    {"id": 6,  "z": 1, "time": 3},
Esempio n. 6
0
df = pd.DataFrame([
    [1, 3, True, 1],
    [6, 4, False, 0],
    [3, 5, True, 1],
    [2, 5, False, 1],
    [4, 6, True, 1],
    [7, 7, True, 0],
    [8, 8, False, 0],
    [5, 8, False, 1],
    [9, 9, True, 0],
    [10, 10, True, 0],
],
                  columns=['id', 'time', 'event', 'group'])

df = to_long_format(df, 'time')

cv = pd.DataFrame.from_records([
    {
        "id": 1,
        "z": 0,
        "time": 0
    },
    {
        "id": 6,
        "z": 1,
        "time": 0
    },
    {
        "id": 3,
        "z": 1,
Esempio n. 7
0
        [1, 3, True, 1],
        [6, 4, False, 0],
        [3, 5, True, 1],
        [2, 5, False, 1],
        [4, 6, True, 1],
        [7, 7, True, 0],
        [8, 8, False, 0],
        [5, 8, False, 1],
        [9, 9, True, 0],
        [10, 10, True, 0],
    ],
    columns=["id", "time", "event", "group"],
)


df = to_long_format(df, "time")

cv = pd.DataFrame.from_records(
    [
        {"id": 1, "z": 0, "time": 0},
        {"id": 6, "z": 1, "time": 0},
        {"id": 3, "z": 1, "time": 0},
        {"id": 2, "z": 0, "time": 0},
        {"id": 4, "z": 0, "time": 0},
        {"id": 7, "z": 0, "time": 0},
        {"id": 8, "z": 0, "time": 0},
        {"id": 5, "z": 0, "time": 0},
        {"id": 9, "z": 0, "time": 0},
        {"id": 10, "z": 0, "time": 0},
        {"id": 1, "z": 0, "time": 3},
        {"id": 6, "z": 1, "time": 3},
# -*- coding: utf-8 -*-
if __name__ == "__main__":
    import time
    import pandas as pd
    from lifelines import CoxTimeVaryingFitter
    from lifelines.datasets import load_rossi
    from lifelines.utils import to_long_format

    df = load_rossi()
    df = pd.concat([df] * 20)
    df = df.reset_index()
    df = to_long_format(df, duration_col="week")
    ctv = CoxTimeVaryingFitter()
    start_time = time.time()
    ctv.fit(df,
            id_col="index",
            event_col="arrest",
            start_col="start",
            stop_col="stop")
    time_took = time.time() - start_time
    print("--- %s seconds ---" % time_took)
    ctv.print_summary()
# Merge fund_cv DataFrame
# df = df.merge(fund_cv.drop(columns='name'), on='id', how='inner')
# Reduce to just companies for which we have relevant information
df = df[df.id.isin(fund_cv.id.unique())]

# SMALL SUBSET TEST CODE:
# dr = df[(df.name == 'Twitter') | (df.name == 'Facebook')]
# lr = to_long_format(dr, duration_col='age_at_exit')
# rr = fund_cv.loc[(fund_cv.name == 'Twitter') | (fund_cv.name == 'Facebook'),
#             ['id', 'raised_amount_usd', 'time_to_funding']]
# lr = add_covariate_to_timeline(lr, rr, 'id', 'time_to_funding', 'success', cumulative_sum=True)

#------------------------------------------------------------------------------
#        Prepare DataFrame for lifelines analysis
#------------------------------------------------------------------------------
lf = to_long_format(df, 'age_at_exit')

# Piped version (possibly faster?)
# lf = df.pipe(to_long_format, 'age_at_exit')\
#        .pipe(add_covariate_to_timeline(fund_cv_amt,
#                                        'id', 'time_to_funding', 'success',
#                                        cumulative_sum=False)\

# NOTE WARNING THIS LINE IS SUPER F*****G SLOW.
# Add raised_amount_usd as time-varying covariate
fund_cv_amt = fund_cv[['id', 'raised_amount_usd', 'time_to_funding']]
lf = add_covariate_to_timeline(lf,
                               fund_cv_amt,
                               'id',
                               'time_to_funding',
                               'success',
Esempio n. 10
0
    [
        [1, 3, True, 1],
        [6, 4, False, 0],
        [3, 5, True, 1],
        [2, 5, False, 1],
        [4, 6, True, 1],
        [7, 7, True, 0],
        [8, 8, False, 0],
        [5, 8, False, 1],
        [9, 9, True, 0],
        [10, 10, True, 0],
    ],
    columns=["id", "time", "event", "group"],
)

df = to_long_format(df, "time")

cv = pd.DataFrame.from_records([
    {
        "id": 1,
        "z": 0,
        "time": 0
    },
    {
        "id": 6,
        "z": 1,
        "time": 0
    },
    {
        "id": 3,
        "z": 1,