Python negative_log_likelihood 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: deepsurvk

메소드/함수: negative_log_likelihood

hotexamples.com에서의 예제들: 2

Python negative_log_likelihood - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 deepsurvk.negative_log_likelihood에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def optimize_hp(X,
                Y,
                E,
                mode='grid',
                n_splits=3,
                n_repeats=5,
                verbose=True,
                **params):
    """
    Optimize the (hyper)parameters of a DeepSurvK model using 
    cross-validation.
    
    Parameters
    ----------
    X: pandas DataFrame
        Data
    Y: pandas DataFrame
        It needs to have column 'T'
    E: pandas DataFrame
        It needs to have column 'E'
    mode: string
        Possible values are:
            'grid' (default)
            'random' TODO
    n_splits: int (optional)
        Number of folds. Default value is 3, as suggested in [1].
    n_repeats: int (optional)
        Number of CV repetition. Default value is 5.
    verbose: boolean (optional)
        Define if verbose output is desired (True, default) or not (False)
    params: dictionary
        Each key corresponds to a parameter. 
        The values correspond to a list of parameters to be explored.
        
        The number of epochs can be defined here. It should also be given as
        an entry of the dictionary with key `epochs` and value a list
        comprised of only one element. If the list has more than
        one element, only the first one will be considered. If number
        of epochs isn't defined by the user, then a default of 1000 will
        be used.
            
    Returns
    -------
    best_params: dictionary
        Best parameters.
        Each key corresponds to a parameter. 
        The values correspond to the optimized parameter.
        
    References
    ----------
    [1] Katzman, Jared L., et al. "DeepSurv: personalized treatment recommender system using a Cox proportional hazards deep neural network." BMC medical research methodology 18.1 (2018): 24.
    """

    # Check if number of epochs was defined.
    if 'epochs' in params:
        # If yes, extract its value (and remove it from the dictionary,
        # since it won't be optimized).
        epochs = params['epochs'][0]
        params.pop('epochs')

    else:
        # If not, set a default value of 1000.
        epochs = 1000

    # Generating a list of dictionaries with all possible combinations.
    # Trick from https://stackoverflow.com/a/61335465/948768
    keys, values = zip(*params.items())
    params_list = [dict(zip(keys, v)) for v in itertools.product(*values)]

    # Compute important parameters.
    n_features = X.shape[1]
    n_combinations = len(params_list)
    if verbose:
        print(f"Optimizing {n_combinations} parameter combinations.")

    if verbose:
        started_at = datetime.datetime.now().replace(microsecond=0)
        print("Optimization started at: ", end='', flush=True)
        print(started_at.strftime("%Y-%m-%d %H:%M:%S"))

    # Initialize important variables.
    c_index_mean = []
    c_index_std = []

    # Loop through all possible parameter combinations.
    for ii, params_curr in enumerate(params_list):

        if verbose:
            print(f"Parameter set {ii+1}/{n_combinations}...")
            print(params_curr)

        # Create RepatedKFold object.
        rkf = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats)

        # To store results.
        c_index_param = []

        # Loop through different data partitions.
        for jj, (train_index, val_index) in enumerate(rkf.split(X, Y)):

            if verbose:
                print(f"\tIteration {jj+1}/{n_splits*n_repeats}...",
                      end='',
                      flush=True)

            # Perform data partition.
            X_train, X_val = X.iloc[train_index, :], X.iloc[val_index, :]
            Y_train, Y_val = Y.iloc[train_index, :], Y.iloc[val_index, :]
            E_train, E_val = E.iloc[train_index, :], E.iloc[val_index, :]

            # Create DSK model (with current loop's parameters)
            dsk = deepsurvk.DeepSurvK(n_features=n_features,
                                      E=E_train,
                                      **params_curr)
            loss = deepsurvk.negative_log_likelihood(E_train)
            dsk.compile(loss=loss)
            callbacks = deepsurvk.common_callbacks()

            # Fit model.
            n_patients_train = X_train.shape[0]
            dsk.fit(X_train,
                    Y_train,
                    batch_size=n_patients_train,
                    epochs=epochs,
                    callbacks=callbacks,
                    shuffle=False)

            # Generate predictions.
            Y_pred_val = np.exp(-dsk.predict(X_val))

            # Compute quality metric (c-index)
            c = deepsurvk.concordance_index(Y_val, Y_pred_val, E_val)
            c_index_param.append(c)

            if verbose:
                print(f"\tc-index = {c}")

        # Calculate c-index mean and STD for current parameter set.
        c_index_mean.append(np.nanmean(c_index_param))
        c_index_std.append(np.nanstd(c_index_param))

    if verbose:
        ended_at = datetime.datetime.now().replace(microsecond=0)
        print("Optimization ended at: ", end='', flush=True)
        print(ended_at.strftime("%Y-%m-%d %H:%M:%S"))
        print(f"Optimization took {ended_at-started_at}")

    # Find parameter combination with highest c-index.
    c_index_mean_max = max(c_index_mean)
    idx = c_index_mean.index(c_index_mean_max)

    best_params = params_list[idx]

    return best_params

예제 #2

파일 보기

파일: 02_treatment_recommendation.py 프로젝트: 1512474508/deepsurvk

    'n_layers': 1,
    'n_nodes': 8,
    'activation': 'selu',
    'learning_rate': 0.154,
    'decays': 5.667e-3,
    'momentum': 0.887,
    'l2_reg': 6.551,
    'dropout': 0.661,
    'optimizer': 'nadam'
}

#%%
dsk = deepsurvk.DeepSurvK(n_features=n_features, E=E_train, **params)

#%%
loss = deepsurvk.negative_log_likelihood(E_train)
dsk.compile(loss=loss)

#%%
callbacks = deepsurvk.common_callbacks()

epochs = 1000
history = dsk.fit(X_train,
                  Y_train,
                  batch_size=n_patients_train,
                  epochs=epochs,
                  callbacks=callbacks,
                  shuffle=False)

#%%
deepsurvk.plot_loss(history)