コード例 #1
0
ファイル: process.py プロジェクト: iamysk/SurvCNN
    def input_process2(self, training_list, clinical):
        print("Data processing-II...")
        sample, t, f, age = [], [], [], []

        for list in tqdm(training_list):
            for i in range(len(clinical)):
                if clinical.iloc[i]['sample'] + '.png' == str(list):
                    p_id = clinical.iloc[i]['sample']
                    time = clinical.iloc[i]['os_time']
                    status = clinical.iloc[i]['vital_status']
                    a = clinical.iloc[i]['age']

                    sample.append(p_id)
                    t.append(time)
                    f.append(status)
                    age.append(a)
                    continue
                else:
                    pass
        t  = np.asarray(t)
        f  = np.asarray(f)
        sample  = np.asarray(sample)
        age = np.asarray(age)

        br=np.arange(0.,365.*10,365./4)
        nl=len(br)-1
        y_t = nnet_survival.make_surv_array(t,f,br)
        ind = range(len(f))
        print('Done!')

        if self.omics=='mrna':
            rand_range=[1,2]
        if self.omics=='meth':
            rand_range=[3,4]
        if self.omics=='mirna':
            rand_range=[4,5]
        if self.omics=='mrna_meth':
            rand_range=[6,7]
        if self.omics=='mrna_mirna':
            rand_range=[8,9]
        if self.omics=='mrna_meth_mirna':
            rand_range=[10,11]

        return t, f, sample, age, br, nl, y_t, ind, rand_range
コード例 #2
0
#Cox model discrimination test set
prediction = cph.predict_partial_hazard(data_test)
print(concordance_index(data_test.time, -prediction, data_test.dead))  #0.735

################################
#Nnet-survival / Our model (flexible version to
#allow non-proportional hazards)

halflife = 365. * 1.4
breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2)
#breaks=-np.log(1-np.arange(0.0,1,0.099))*halflife/np.log(2)
n_intervals = len(breaks) - 1
timegap = breaks[1:] - breaks[:-1]

y_train = nnet_survival.make_surv_array(data_train.time.values,
                                        data_train.dead.values, breaks)
y_test = nnet_survival.make_surv_array(data_test.time.values,
                                       data_test.dead.values, breaks)

hidden_layers_sizes = 7  #Using single hidden layer, with this many neurons

##############################################################
#Our model cross-validation to pick L2 regularization strength

from sklearn.model_selection import KFold
n_folds = 10
kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
early_stopping = EarlyStopping(monitor='loss', patience=20)

#l2_array = np.concatenate(([0.],np.power(10.,np.arange(-6,-2))))
l2_array = np.power(10., np.arange(-4, 1))
コード例 #3
0
def binary_ANN_surviavl():
    breaks = np.arange(0, 5000, 50)
    n_intervals = len(breaks) - 1
    timegap = breaks[1:] - breaks[:-1]

    halflife1 = 200
    halflife2 = 400
    halflife_cens = 400
    n_samples = 5000
    np.random.seed(seed=0)
    t1 = np.random.exponential(scale=1 / (np.log(2) / halflife1),
                               size=int(n_samples / 2))
    t2 = np.random.exponential(scale=1 / (np.log(2) / halflife2),
                               size=int(n_samples / 2))
    t = np.concatenate((t1, t2))
    censtime = np.random.exponential(scale=1 / (np.log(2) / (halflife_cens)),
                                     size=n_samples)
    f = t < censtime
    t[~f] = censtime[~f]

    y_train = nnet_survival.make_surv_array(t, f, breaks)
    x_train = np.zeros(n_samples)
    x_train[int(n_samples / 2):] = 1

    model = Sequential()
    # Hidden layers would go here. For this example, using simple linear model with no hidden layers.
    model.add(Dense(1, input_dim=1, use_bias=0, kernel_initializer='zeros'))
    model.add(nnet_survival.PropHazards(n_intervals))
    model.compile(loss=nnet_survival.surv_likelihood(n_intervals),
                  optimizer=optimizers.RMSprop())
    # model.summary()
    early_stopping = EarlyStopping(monitor='loss', patience=2)
    history = model.fit(x_train,
                        y_train,
                        batch_size=32,
                        epochs=1000,
                        callbacks=[early_stopping])
    y_pred = model.predict_proba(x_train, verbose=0)

    kmf = KaplanMeierFitter()
    kmf.fit(t[0:int(n_samples / 2)], event_observed=f[0:int(n_samples / 2)])
    plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[0, :]))), 'bo-')
    plt.plot(kmf.survival_function_.index.values,
             kmf.survival_function_.KM_estimate,
             color='k')
    kmf.fit(t[int(n_samples / 2) + 1:],
            event_observed=f[int(n_samples / 2) + 1:])
    plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[-1, :]))), 'ro-')
    plt.plot(kmf.survival_function_.index.values,
             kmf.survival_function_.KM_estimate,
             color='k')
    plt.xticks(np.arange(0, 2000.0001, 200))
    plt.yticks(np.arange(0, 1.0001, 0.125))
    plt.xlim([0, 2000])
    plt.ylim([0, 1])
    plt.xlabel('Follow-up time (days)')
    plt.ylabel('Proportion surviving')
    plt.title('One covariate. Actual=black, predicted=blue/red.')
    plt.show()

    myData = pd.DataFrame({'x_train': x_train, 't': t, 'f': f})
    cf = CoxPHFitter()
    cf.fit(myData, 't', event_col='f')
    # x_train = x_train.astype(np.float64)
    # cox_coef = cf.hazards_.x_train.values[0]
    cox_coef = cf.hazards_.x_train
    nn_coef = model.get_weights()[0][0][0]
    print('Cox model coefficient:')
    print(cox_coef)
    print('Cox model hazard ratio:')
    print(np.exp(cox_coef))
    print('Neural network coefficient:')
    print(nn_coef)
    print('Neural network hazard ratio:')
    print(np.exp(nn_coef))
コード例 #4
0
    breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2)
    n_intervals = len(breaks) - 1
    timegap = breaks[1:] - breaks[:-1]

##################################################################
#Flexible model (non-proportional hazards).
#All pts with same exponential survival distribution, no censoring
#Not described in paper.

halflife1 = 365.
n_samples = 1000
np.random.seed(seed=0)
t = np.random.exponential(scale=1 / (np.log(2) / halflife1), size=n_samples)
f = np.ones(n_samples)  #all patients failed (none censored)
#y_train=nnet_survival.make_surv_array(t,f)
y_train = nnet_survival.make_surv_array(t, f, breaks)
x_train = np.zeros(n_samples)

model = Sequential()
#Hidden layers would go here. For this example, using simple linear model with no hidden layers.
model.add(
    Dense(n_intervals,
          input_dim=1,
          kernel_initializer='zeros',
          bias_initializer='zeros'))
model.add(Activation('sigmoid'))
model.compile(loss=nnet_survival.surv_likelihood(n_intervals),
              optimizer=optimizers.RMSprop())
#model.summary()
early_stopping = EarlyStopping(monitor='loss', patience=2)
history = model.fit(x_train,
コード例 #5
0
def ANN_survival_model():
    #################################################################
    print('-------------------------------------------------------------')
    print(
        'start cross-validation to pick L2 regularization strength for training'
    )
    print('-------------------------------------------------------------')
    halflife = 365. * 2.8

    breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2)
    n_intervals = len(breaks) - 1
    timegap = breaks[1:] - breaks[:-1]

    # y_train = nnet_survival.make_surv_array(data_train.time.values, data_train.dead.values, breaks)
    # y_test = nnet_survival.make_surv_array(data_test.time.values, data_test.dead.values, breaks)

    y_train = nnet_survival.make_surv_array(data_train.duration_d.values,
                                            data_train.CVD.values, breaks)
    y_test = nnet_survival.make_surv_array(data_test.duration_d.values,
                                           data_test.CVD.values, breaks)

    # uncensored 데이터와 censored 데이터를 구분
    # uncensored 데이터는 2번째 배열에서 dead 인터벌에 1값
    # censored 데이터는 2번째 배열에서 0 값
    hidden_layers_sizes = 7  # Using single hidden layer, with this many neurons

    from sklearn.model_selection import KFold

    n_folds = 10
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
    early_stopping = EarlyStopping(monitor='loss', patience=20)

    # l2_array = np.concatenate(([0.],np.power(10.,np.arange(-6,-2))))
    l2_array = np.power(10., np.arange(-4, 1))
    grid_search_train = np.zeros((len(l2_array), n_folds))
    grid_search_test = np.zeros((len(l2_array), n_folds))
    print('execution of 10-fold validation for five times\n')
    for i in range(1):
        # for i in range(len(l2_array)):
        print(str(i + 1) + ' / ' + str(len(l2_array)) + " times")
        j = 0
        cv_folds = kf.split(x_train)
        for traincv, testcv in cv_folds:
            x_train_cv = x_train[traincv]
            y_train_cv = y_train[traincv]
            x_test_cv = x_train[testcv]
            y_test_cv = y_train[testcv]

            # 활성함수는 렐루, 마지막 레이어에 시그모이드, iterator 1000, 7차원 hidden layer
            model = Sequential()
            # model.add(Dense(n_intervals,input_dim=x_train.shape[1],bias_initializer='zeros',kernel_regularizer=regularizers.l2(l2_array[i])))

            # 입력층 개수는 변수의 개수
            model.add(
                Dense(hidden_layers_sizes,
                      input_dim=x_train.shape[1],
                      bias_initializer='zeros',
                      activation='relu',
                      kernel_regularizer=regularizers.l2(l2_array[i])))
            # model.add(Activation('relu'))
            model.add(Dense(n_intervals))
            model.add(Activation('sigmoid'))

            model.compile(loss=nnet_survival.surv_likelihood(n_intervals),
                          optimizer=optimizers.RMSprop())  # lr=0.0001))

            history = model.fit(x_train_cv,
                                y_train_cv,
                                batch_size=256,
                                epochs=100000,
                                callbacks=[early_stopping],
                                verbose=0)
            # model.summary()
            print(model.metrics_names)
            grid_search_train[i, j] = model.evaluate(x_train_cv,
                                                     y_train_cv,
                                                     verbose=0)
            print(grid_search_train[i, j])
            grid_search_test[i, j] = model.evaluate(x_test_cv,
                                                    y_test_cv,
                                                    verbose=0)
            print(grid_search_test[i, j])
            j = j + 1

    print(np.average(grid_search_train, axis=1))
    print(np.average(grid_search_test, axis=1))
    l2_final = l2_array[np.argmax(-np.average(grid_search_test, axis=1))]

    ############################### plot ######################################
    fig, loss_ax = plt.subplots()
    acc_ax = loss_ax.twinx()

    loss_ax.plot(history.history['loss'], 'y', label='train loss')
    loss_ax.plot(history.history['val_loss'], 'r', label='test loss')

    acc_ax.plot(history.history['acc'], 'b', label='train acc')
    acc_ax.plot(history.history['val_acc'], 'g', label='test acc')

    loss_ax.set_xlabel('epoch')
    loss_ax.set_ylabel('loss')
    acc_ax.set_ylabel('accuray')

    loss_ax.legend(loc='upper left')
    acc_ax.legend(loc='lower left')

    plt.show()
    ###########################################################################
    score = model.evaluate(x_test, y_test, batch_size=2, verbose=1)
    # print('Test loss: ', score[0])
    # print('Test accuracy: ', score[1])

    # Discrimination performance
    y_pred = model.predict_proba(x_train, verbose=1)
    oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]],
                            axis=1)[:, -1]

    print('================================')
    print('Training data with concordance_index ')
    print(concordance_index(data_train.duration_d, oneyr_surv, data_train.CVD))
    print('================================')

    y_pred = model.predict_proba(x_test, verbose=1)
    oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]],
                            axis=1)[:, -1]

    print('================================')
    print('Test data with concordance_index ')
    print(concordance_index(data_test.duration_d, oneyr_surv, data_test.CVD))
    print('================================')