def predict(self, neighbor_idxs, Survival_train, Censored_train, Survival_test = None, Censored_test = None, K = 30, Method = "cumulative-time"): """ Predict testing set using 'prototype' (i.e. training) set using KNN neighbor_idxs - indices of nearest neighbors; (N_test, N_train) Survival_train - training sample time-to-event; (N,) np array Censored_train - training sample censorship status; (N,) np array K - number of nearest-neighbours to use, int """ # Keep only desired K neighbor_idxs = neighbor_idxs[:, 0:K] # Initialize N_test = neighbor_idxs.shape[0] T_test = np.zeros([N_test]) if Method == 'non-cumulative': # Convert outcomes to "alive status" at each time point alive_train = sUtils.getAliveStatus(Survival_train, Censored_train) # Get survival prediction for each patient for idx in range(N_test): status = alive_train[neighbor_idxs[idx, :], :] totalKnown = np.sum(status >= 0, axis = 0) status[status < 0] = 0 # remove timepoints where there are no known statuses # (i.e. after last neighbor dies or gets censored) status = status[:, totalKnown != 0] totalKnown = totalKnown[totalKnown != 0] # get "average" predicted survival time status = np.sum(status, axis = 0) / totalKnown # now get overall time prediction T_test[idx] = np.sum(status) elif Method in ['cumulative-time', 'cumulative-hazard']: # itirate through patients for idx in range(N_test): # Get time and censorship T = Survival_train[neighbor_idxs[idx, :]] C = Censored_train[neighbor_idxs[idx, :]] if C.min() == 1: # All cases are censored if Method == "cumulative-time": T_test[idx] = T.max() elif Method == "cumulative-hazard": T_test[idx] = 0 continue if Method == "cumulative-time": # Get km estimator t, f = self._km_estimator(T, C) # Get mean survival time T_test[idx] = np.sum(np.diff(t) * f[0:-1]) elif Method == 'cumulative-hazard': # Get NA estimator T = Survival_train[neighbor_idxs[idx, :]] C = Censored_train[neighbor_idxs[idx, :]] t, f = self._na_estimator(T, C) # Get integral under cum. hazard curve T_test[idx] = np.sum(np.diff(t) * f[0:-1]) else: raise ValueError("Method not implemented.") # Get c-index Ci = 0 if Method == "cumulative-hazard": prediction_type = "risk" else: prediction_type = "survival_time" if Survival_test is not None: assert (Censored_test is not None) Ci = sUtils.c_index(T_test, Survival_test, Censored_test, prediction_type= prediction_type) return T_test, Ci
def predict(self, neighbor_idxs, Survival_train, Censored_train, Survival_test=None, Censored_test=None, K=15, Method='non-cumulative'): """ Predict testing set using 'prototype' (i.e. training) set using KNN neighbor_idxs - indices of nearest neighbors; (N_test, N_train) Survival_train - training sample time-to-event; (N,) np array Censored_train - training sample censorship status; (N,) np array K - number of nearest-neighbours to use, int Method - cumulative vs non-cumulative probability """ # Keep only desired K neighbor_idxs = neighbor_idxs[:, 0:K] # Initialize N_test = neighbor_idxs.shape[0] T_test = np.zeros([N_test]) if Method == 'non-cumulative': # Convert outcomes to "alive status" at each time point alive_train = sUtils.getAliveStatus(Survival_train, Censored_train) # Get survival prediction for each patient for idx in range(N_test): status = alive_train[neighbor_idxs[idx, :], :] totalKnown = np.sum(status >= 0, axis=0) status[status < 0] = 0 # remove timepoints where there are no known statuses status = status[:, totalKnown != 0] totalKnown = totalKnown[totalKnown != 0] # get "average" predicted survival time status = np.sum(status, axis=0) / totalKnown # now get overall time prediction T_test[idx] = np.sum(status) elif Method == 'cumulative': for idx in range(N_test): # Get at-risk groups for each time point for nearest neighbors T = Survival_train[neighbor_idxs[idx, :]] O = 1 - Censored_train[neighbor_idxs[idx, :]] T, O, at_risk, _ = sUtils.calc_at_risk(T, O) N_at_risk = K - at_risk # Calcuate cumulative probability of survival P = np.cumprod((N_at_risk - O) / N_at_risk) # now get overall time prediction T_test[idx] = np.sum(P) else: raise ValueError( "Method is either 'cumulative' or 'non-cumulative'.") # Get c-index #====================================================================== CI = 0 if Survival_test is not None: assert (Censored_test is not None) CI = sUtils.c_index(T_test, Survival_test, Censored_test, prediction_type='survival_time') return T_test, CI
if np.min(Data['Survival']) < 0: Data['Survival'] = Data['Survival'] - np.min(Data['Survival']) + 1 Survival = np.int32(Data['Survival']) Censored = np.int32(Data['Censored']) #fnames = Data['Integ_Symbs'] fnames = Data['Gene_Symbs'] # remove zero-variance features fvars = np.std(data, 0) keep = fvars > 0 data = data[:, keep] fnames = fnames[keep] # Generate survival status - discretized into months aliveStatus = sUtils.getAliveStatus(Survival, Censored, scale=30) #============================================================================ # train a survival NCA model #============================================================================== ncaParams = { 'LOADPATH': None, #"/home/mohamed/Desktop/CooperLab_Research/KNN_Survival/Results/tmp/GBMLGG_Integ_ModelAttributes.txt", 'RESULTPATH': "/home/mohamed/Desktop/CooperLab_Research/KNN_Survival/Results/tmp/", 'description': "GBMLGG_Gene_", 'SIGMA': 1, 'LAMBDA': 0, 'LEARN_RATE': 0.01, 'MONITOR_STEP': 1,