Esempio n. 1
0
    def predict(self, neighbor_idxs,
                Survival_train, Censored_train, 
                Survival_test = None, Censored_test = None, 
                K = 30, Method = "cumulative-time"):
        
        """
        Predict testing set using 'prototype' (i.e. training) set using KNN
        
        neighbor_idxs - indices of nearest neighbors; (N_test, N_train)
        Survival_train - training sample time-to-event; (N,) np array
        Censored_train - training sample censorship status; (N,) np array
        K           - number of nearest-neighbours to use, int
        """
        
        # Keep only desired K
        neighbor_idxs = neighbor_idxs[:, 0:K]

        # Initialize        
        N_test = neighbor_idxs.shape[0]
        T_test = np.zeros([N_test])

        if Method == 'non-cumulative':
            
            # Convert outcomes to "alive status" at each time point 
            alive_train = sUtils.getAliveStatus(Survival_train, Censored_train)
    
            # Get survival prediction for each patient            
            for idx in range(N_test):
                
                status = alive_train[neighbor_idxs[idx, :], :]
                totalKnown = np.sum(status >= 0, axis = 0)
                status[status < 0] = 0
                
                # remove timepoints where there are no known statuses
                # (i.e. after last neighbor dies or gets censored)
                status = status[:, totalKnown != 0]
                totalKnown = totalKnown[totalKnown != 0]
                
                # get "average" predicted survival time
                status = np.sum(status, axis = 0) / totalKnown
                
                # now get overall time prediction            
                T_test[idx] = np.sum(status)
                
        elif Method in ['cumulative-time', 'cumulative-hazard']:

                # itirate through patients

                for idx in range(N_test):
                    
                    # Get time and censorship
                    T = Survival_train[neighbor_idxs[idx, :]]
                    C = Censored_train[neighbor_idxs[idx, :]]
    
                    if C.min() == 1:
                        # All cases are censored
                        if Method == "cumulative-time":
                            T_test[idx] = T.max()
                        elif Method == "cumulative-hazard":
                            T_test[idx] = 0
                        continue
                        
                    if Method == "cumulative-time":
                    
                        # Get km estimator
                        t, f = self._km_estimator(T, C)
                    
                        # Get mean survival time
                        T_test[idx] = np.sum(np.diff(t) * f[0:-1])
                    
                    elif Method == 'cumulative-hazard':
                    
                        # Get NA estimator
                        T = Survival_train[neighbor_idxs[idx, :]]
                        C = Censored_train[neighbor_idxs[idx, :]]
                        t, f = self._na_estimator(T, C)
                    
                        # Get integral under cum. hazard curve
                        T_test[idx] = np.sum(np.diff(t) * f[0:-1])
        
        else:
            raise ValueError("Method not implemented.")
                   
        
        # Get c-index
        Ci = 0
        if Method == "cumulative-hazard":
            prediction_type = "risk"
        else:
            prediction_type = "survival_time"

        if Survival_test is not None:
            assert (Censored_test is not None)
            Ci = sUtils.c_index(T_test, Survival_test, Censored_test, 
                                prediction_type= prediction_type)
            
        return T_test, Ci
Esempio n. 2
0
    def predict(self,
                neighbor_idxs,
                Survival_train,
                Censored_train,
                Survival_test=None,
                Censored_test=None,
                K=15,
                Method='non-cumulative'):
        """
        Predict testing set using 'prototype' (i.e. training) set using KNN
        
        neighbor_idxs - indices of nearest neighbors; (N_test, N_train)
        Survival_train - training sample time-to-event; (N,) np array
        Censored_train - training sample censorship status; (N,) np array
        K           - number of nearest-neighbours to use, int
        Method      - cumulative vs non-cumulative probability
        """

        # Keep only desired K
        neighbor_idxs = neighbor_idxs[:, 0:K]

        # Initialize
        N_test = neighbor_idxs.shape[0]
        T_test = np.zeros([N_test])

        if Method == 'non-cumulative':

            # Convert outcomes to "alive status" at each time point
            alive_train = sUtils.getAliveStatus(Survival_train, Censored_train)

            # Get survival prediction for each patient
            for idx in range(N_test):

                status = alive_train[neighbor_idxs[idx, :], :]
                totalKnown = np.sum(status >= 0, axis=0)
                status[status < 0] = 0

                # remove timepoints where there are no known statuses
                status = status[:, totalKnown != 0]
                totalKnown = totalKnown[totalKnown != 0]

                # get "average" predicted survival time
                status = np.sum(status, axis=0) / totalKnown

                # now get overall time prediction
                T_test[idx] = np.sum(status)

        elif Method == 'cumulative':

            for idx in range(N_test):

                # Get at-risk groups for each time point for nearest neighbors
                T = Survival_train[neighbor_idxs[idx, :]]
                O = 1 - Censored_train[neighbor_idxs[idx, :]]
                T, O, at_risk, _ = sUtils.calc_at_risk(T, O)

                N_at_risk = K - at_risk

                # Calcuate cumulative probability of survival
                P = np.cumprod((N_at_risk - O) / N_at_risk)

                # now get overall time prediction
                T_test[idx] = np.sum(P)

        else:
            raise ValueError(
                "Method is either 'cumulative' or 'non-cumulative'.")

        # Get c-index
        #======================================================================
        CI = 0
        if Survival_test is not None:
            assert (Censored_test is not None)
            CI = sUtils.c_index(T_test,
                                Survival_test,
                                Censored_test,
                                prediction_type='survival_time')

        return T_test, CI
    if np.min(Data['Survival']) < 0:
        Data['Survival'] = Data['Survival'] - np.min(Data['Survival']) + 1

    Survival = np.int32(Data['Survival'])
    Censored = np.int32(Data['Censored'])
    #fnames = Data['Integ_Symbs']
    fnames = Data['Gene_Symbs']

    # remove zero-variance features
    fvars = np.std(data, 0)
    keep = fvars > 0
    data = data[:, keep]
    fnames = fnames[keep]

    # Generate survival status - discretized into months
    aliveStatus = sUtils.getAliveStatus(Survival, Censored, scale=30)

    #============================================================================
    # train a survival NCA model
    #==============================================================================

    ncaParams = {
        'LOADPATH':
        None,  #"/home/mohamed/Desktop/CooperLab_Research/KNN_Survival/Results/tmp/GBMLGG_Integ_ModelAttributes.txt",
        'RESULTPATH':
        "/home/mohamed/Desktop/CooperLab_Research/KNN_Survival/Results/tmp/",
        'description': "GBMLGG_Gene_",
        'SIGMA': 1,
        'LAMBDA': 0,
        'LEARN_RATE': 0.01,
        'MONITOR_STEP': 1,