Esempio n. 1
0
    # data.loc[(data.ttocvd <= TARGET_END) & (data.cvd == 0), 'label'] = 2

    data = data_short_formatting(data, ['label', 'cvd', 'ttocvd'] + BASE_COVS +
                                 INDICATORS, MARKERS, TRUNCATE_TIME)
    FEATURE_LIST = data.columns[4:-3]
    train_set, test_set = train_test_split(data, .3)

    clf = LogisticRegression(solver='lbfgs').fit(
        train_set.loc[:, FEATURE_LIST].values, train_set.loc[:,
                                                             'label'].values)

    result = np.vstack(
        (clf.predict_proba(test_set.loc[:, FEATURE_LIST].values)[:, 1],
         test_set.loc[:, 'label'])).T
    print("death ratio",
          sum(test_set.loc[:, 'label'] == 1) / test_set.shape[0])

    print(
        "accuracy",
        clf.score(test_set.loc[:, FEATURE_LIST].values,
                  test_set.loc[:, 'label'].values))

    print(
        "aucJM",
        auc_jm(
            torch.Tensor(test_set.loc[:, 'label'].values),
            torch.Tensor(test_set.loc[:, 'ttocvd'].values),
            torch.Tensor(
                clf.predict_proba(test_set.loc[:, FEATURE_LIST].values)[:, 1]),
            TARGET_END))
Esempio n. 2
0
    train_set, test_set = train_test_split(data, .3)

    x_train, label_train = \
        torch.from_numpy(train_set.loc[:, FEATURE_LIST].values).type(torch.FloatTensor), \
        torch.from_numpy(train_set['label'].values).type(torch.LongTensor)

    x_test, label_test = \
        torch.from_numpy(test_set.loc[:, FEATURE_LIST].values).type(torch.FloatTensor), \
        torch.from_numpy(test_set['label'].values).type(torch.LongTensor)

    train_loss = []
    test_loss = []
    for epoch in range(n_epochs):
        print("*************** new epoch ******************")
        auc_test = auc_jm(
            torch.from_numpy(test_set['cvd'].values).type(torch.IntTensor),
            torch.from_numpy(test_set['ttocvd'].values).type(torch.IntTensor),
            model(x_test)[:, 1], TARGET_TIME)
        print("ten year auc:", auc_test)

        label_pred = torch.exp(model(x_test))[:, 1].data > .5
        label_true = test_set['label'].values
        print("accuracy",
              sum(label_pred.numpy() == label_true) * 1.0 / len(label_true))

        scheduler.step()
        for param_group in optimizer.param_groups:
            print("learning rate:", param_group['lr'])
        train_loss = train_loss + train(batch_size=batch_size)
        test_loss = test_loss + test(batch_size=batch_size)

        print("parameter change:", param_change(param, model))
        torch.from_numpy(test_set.loc[:, FEATURE_LIST].values).type(torch.FloatTensor), \
        torch.from_numpy(test_set['label'].values).type(torch.LongTensor), \
        torch.from_numpy(test_set['cvd'].values).type(torch.LongTensor)

    train_loss = []
    test_loss = []
    test_event = torch.from_numpy(test_set['cvd'].values).type(torch.IntTensor)
    test_time = torch.from_numpy(test_set['ttocvd'].values).type(
        torch.IntTensor)
    for epoch in range(n_epochs):
        print("*************** new epoch ******************")
        pred = 1 - torch.cumprod(1 - model(x_test), dim=1)
        print(torch.mean(model(x_test), dim=0))

        auc_test = [
            auc_jm(test_event, test_time, pred[:, 0], 20),
            auc_jm(test_event, test_time, pred[:, 1], 25),
            auc_jm(test_event, test_time, pred[:, 2], 30),
            auc_jm(test_event, test_time, pred[:, 3], 40),
        ]
        print("ten year auc:", auc_test)

        scheduler.step()
        for param_group in optimizer.param_groups:
            print("learning rate:", param_group['lr'])
        train_loss = train_loss + train(batch_size=batch_size)
        test_loss = test_loss + test(batch_size=batch_size)

        print("parameter change:", param_change(param, model))
        param = deepcopy(model.state_dict())
            torch.tensor(round(cox_complex_res.event_time)),
            torch.tensor(cox_complex_res.hazard_ratio)))

jm_simple_res = pd.read_csv(os.path.join(DIR, 'jm_simple_res.csv'),
                            delimiter=',')
jm_complex_res = pd.read_csv(os.path.join(DIR, 'jm_complex_res.csv'),
                             delimiter=',')
# print(pd.concat([jm_simple_res, cox_simple_res['hazard_ratio']], axis=1))

for horizon in [20, 25, 30]:
    print("horizon is", horizon)
    print(
        "simple",
        # fitted result from JM package is survival probability
        1 - auc_jm(torch.tensor(jm_simple_res.event),
                   torch.tensor(round(jm_simple_res.event_time)),
                   torch.tensor(jm_simple_res[str(horizon)]), horizon))
    print(
        "complex",
        1 - auc_jm(torch.tensor(jm_complex_res.event),
                   torch.tensor(round(jm_complex_res.event_time)),
                   torch.tensor(jm_complex_res[str(horizon)]), horizon))

    print(
        c_index(torch.tensor(jm_simple_res.event),
                torch.tensor(round(jm_simple_res.event_time)),
                1 - torch.tensor(jm_simple_res[str(horizon)])))
    print(
        c_index(torch.tensor(jm_complex_res.event),
                torch.tensor(round(jm_complex_res.event_time)),
                1 - torch.tensor(jm_complex_res[str(horizon)])))
Esempio n. 5
0
        torch.from_numpy(test_set.loc[:, FEATURE_LIST].values).type(torch.FloatTensor), \
        torch.from_numpy(test_set['label'].values).type(torch.LongTensor), \
        torch.from_numpy(test_set['cvd'].values).type(torch.LongTensor)

    train_loss = []
    test_loss = []
    test_event = torch.from_numpy(test_set['cvd'].values).type(torch.IntTensor)
    test_time = torch.from_numpy(test_set['ttocvd'].values).type(torch.IntTensor)
    for epoch in range(n_epochs):
        print("*************** new epoch ******************")
        pred = 1 - torch.cumprod(1 - model(x_test), dim=1)
        # print(torch.mean(model(x_test), dim=0))
        print(torch.mean(pred, dim=0)[[6, 11, 16, 26]])

        auc_test = [
            auc_jm(test_event, test_time, pred[:, 6], 20),
            auc_jm(test_event, test_time, pred[:, 11], 25),
            auc_jm(test_event, test_time, pred[:, 16], 30),
            auc_jm(test_event, test_time, pred[:, 26], 40),
        ]
        print("ten year auc:", auc_test)
        cindex = [
            c_index(test_event, test_time, pred[:, 6]),
            c_index(test_event, test_time, pred[:, 11]),
            c_index(test_event, test_time, pred[:, 16]),
            c_index(test_event, test_time, pred[:, 26]),
        ]
        print("c index:", cindex)

        scheduler.step()
        for param_group in optimizer.param_groups: