def __init__(self, label_type, ensemble=50, scores='percentages', parent=None, features=None, fastr_plugin='LinearExecution', name='Example'): """ Initialize object. Parameters ---------- network: fastr network, default None If you input a network, the evaluate network is added to the existing network. """ if parent is not None: self.parent = parent self.network = parent.network self.mode = 'WORC' self.name = parent.network.id self.ensemble = parent.configs[0]['Ensemble']['Use'] else: self.mode = 'StandAlone' self.fastr_plugin = fastr_plugin self.name = 'WORC_Evaluate_' + name self.network = fastr.create_network(id=self.name) self.fastr_tmpdir = os.path.join(fastr.config.mounts['tmp'], self.name) self.ensemble = ensemble if features is None and self.mode == 'StandAlone': raise WORCexceptions.WORCIOError( 'Either features as input or a WORC network is required for the Evaluate network.' ) self.features = features self.label_type = label_type self.create_network()
def load_labels(label_file, label_type): """Loads the label data from a label file Args: label_file (string): The path to the label file label_type (list): List of the names of the labels to load Returns: dict: A dict containing 'patient_IDs', 'label' and 'label_type' """ if not os.path.exists(label_file): raise ae.WORCKeyError(f'File {label_file} does not exist!') _, extension = os.path.splitext(label_file) if extension == '.txt': label_names, patient_IDs, label_status = load_label_txt( label_file) elif extension == '.csv': label_names, patient_IDs, label_status = load_label_csv( label_file) elif extension == '.ini': label_names, patient_IDs, label_status = load_label_XNAT( label_file) else: raise ae.WORCIOError(extension + ' is not valid label file extension.') print("Label names to extract: " + str(label_type)) labels = list() for i_label in label_type: label_index = np.where(label_names == i_label)[0] if label_index.size == 0: raise ae.WORCValueError('Could not find label: ' + str(i_label)) else: labels.append(label_status[:, label_index]) label_data = dict() label_data['patient_IDs'] = patient_IDs label_data['label'] = labels label_data['label_name'] = label_type return label_data
def createfixedsplits(label_file=None, label_type=None, patient_IDs=None, test_size=0.2, N_iterations=1, regression=False, stratify=None, modus='singlelabel', output=None): ''' Create fixed splits for a cross validation. ''' # Check whether input is valid if patient_IDs is None: if label_file is not None and label_type is not None: # Read the label file label_data = load_labels(label_file, label_type) patient_IDs = label_data['patient_IDs'] # Create the stratification object if modus == 'singlelabel': stratify = label_data['label'] elif modus == 'multilabel': # Create a stratification object from the labels # Label = 0 means no label equals one # Other label numbers refer to the label name that is 1 stratify = list() labels = label_data['label'] for pnum in range(0, len(labels[0])): plabel = 0 for lnum, slabel in enumerate(labels): if slabel[pnum] == 1: plabel = lnum + 1 stratify.append(plabel) else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) else: raise ae.WORCIOError( 'Either a label file and label type or patient_IDs need to be provided!' ) pd_dict = dict() for i in range(N_iterations): print(f'Splitting iteration {i + 1} / {N_iterations}') # Create a random seed for the splitting random_seed = np.random.randint(5000) # Define stratification unique_patient_IDs, unique_indices =\ np.unique(np.asarray(patient_IDs), return_index=True) if regression: unique_stratify = None else: unique_stratify = [stratify[i] for i in unique_indices] # Split, throw error when dataset is too small for split ratio's try: unique_PID_train, indices_PID_test\ = train_test_split(unique_patient_IDs, test_size=test_size, random_state=random_seed, stratify=unique_stratify) except ValueError as e: e = str(e) + ' Increase the size of your test set.' raise ae.WORCValueError(e) # Check for all IDs if they are in test or training indices_train = list() indices_test = list() patient_ID_train = list() patient_ID_test = list() for num, pid in enumerate(patient_IDs): if pid in unique_PID_train: indices_train.append(num) # Make sure we get a unique ID if pid in patient_ID_train: n = 1 while str(pid + '_' + str(n)) in patient_ID_train: n += 1 pid = str(pid + '_' + str(n)) patient_ID_train.append(pid) else: indices_test.append(num) # Make sure we get a unique ID if pid in patient_ID_test: n = 1 while str(pid + '_' + str(n)) in patient_ID_test: n += 1 pid = str(pid + '_' + str(n)) patient_ID_test.append(pid) # Add to train object pd_dict[str(i) + '_train'] = patient_ID_train # Test object has to be same length as training object extras = [""] * (len(patient_ID_train) - len(patient_ID_test)) patient_ID_test.extend(extras) pd_dict[str(i) + '_test'] = patient_ID_test # Convert into pandas dataframe for easy use and conversion df = pd.DataFrame(pd_dict) # Write output if required if output is not None: print("Writing Output.") df.to_csv(output) return df
def findlabeldata(patientinfo, label_type, filenames, image_features_temp=None): """ Load the label data and match to the unage features. Args: patientinfo (string): file with patient label data label_type (string): name of the label read out from patientinfo filenames (list): names of the patient feature files, used for matching image_features (np.array or list): array of the features Returns: label_data (dict): contains patient ids, their labels and the label name """ # Get the labels and patient IDs label_data_temp = load_labels(patientinfo, label_type) label_data = dict() patient_IDs = list() label_value = list() for i_len in range(len(label_data_temp['label_name'])): label_value.append(list()) # Check per feature file if there is a match in the label data image_features = list() for i_feat, feat in enumerate(filenames): ifound = 0 matches = list() for i_num, i_patient in enumerate(label_data_temp['patient_IDs']): if i_patient in str(feat): # Match: add the patient ID to the ID's and to the matches patient_IDs.append(i_patient) matches.append(i_patient) # If there are feature files given, add it to the list if image_features_temp is not None: image_features.append(image_features_temp[i_feat]) # For each label that we have, add the value to the label list for i_len in range(len(label_data_temp['label_name'])): label_value[i_len].append(label_data_temp['label'][i_len][i_num]) # Calculate how many matches we found for this (feature) file: should be one ifound += 1 if ifound > 1: message = ('Multiple matches ({}) found in labeling for feature file {}.').format(str(matches), str(feat)) raise ae.WORCIOError(message) elif ifound == 0: message = ('No entry found in labeling for feature file {}.').format(str(feat)) raise ae.WORCIOError(message) # if image_features_temp is not None: # image_features = np.asarray(image_features) # Convert to arrays for i_len in range(len(label_value)): label_value[i_len] = np.asarray(label_value[i_len]) label_data['patient_IDs'] = np.asarray(patient_IDs) label_data['label'] = np.asarray(label_value) label_data['label_name'] = label_data_temp['label_name'] return label_data, image_features
def plot_hyperparameters(prediction, label_type=None, estsize=50, output=None, removeconstants=False, verbose=False): """Gather which hyperparameters have been used in the best workflows. Parameters ---------- prediction: pandas dataframe or string, mandatory output of trainclassifier function, either a pandas dataframe or a HDF5 file estsize: integer, default 50 Number of estimators that should be taken into account. output: filename of csv, default None Output file to write to. If None, not output is written, but just returned as a variable. removeconstants: boolean, default False Determine whether to remove any hyperparameters which have the same value in all workflows. verbose: boolean, default False Whether to show print messages or not. """ # Load the prediction file if type(prediction) is not pd.core.frame.DataFrame: if os.path.isfile(prediction): prediction = pd.read_hdf(prediction) else: raise ae.WORCIOError(f'{prediction} is not an existing file!') # Select the estimator from the pandas dataframe to use keys = prediction.keys() if label_type is None: label_type = keys[0] prediction = prediction[label_type] # Loop over classifiers total = len(prediction.classifiers) for cnum, cls in enumerate(prediction.classifiers): if verbose: print( f'Extracting hyperparameters for iteration {cnum + 1} / {total}.' ) # Get parameters and select only a set number parameters = cls.cv_results_['params'] if len(parameters) > estsize: parameters = parameters[0:estsize] # Additional information besides the parameters for i in range(0, estsize): # Add which (cross-validation) iteration is used and the rank parameters[i]['Iteration'] = cnum + 1 parameters[i]['Rank'] = i + 1 # Add some statistics parameters[i]['Metric'] = cls.scoring parameters[i]['mean_train_score'] =\ cls.cv_results_['mean_train_score'][i] parameters[i]['mean_fit_time'] =\ cls.cv_results_['mean_fit_time'][i] parameters[i]['std_train_score'] =\ cls.cv_results_['std_train_score'][i] parameters[i]['generalization_score'] =\ cls.cv_results_['generalization_score'][i] parameters[i]['rank_generalization_score'] =\ cls.cv_results_['rank_generalization_score'][i] # NOTE: while this is called test score, it is the score on the # validation dataset(s) parameters[i]['mean_validation_score'] =\ cls.cv_results_['mean_test_score'][i] parameters[i]['std_validation_score'] =\ cls.cv_results_['std_test_score'][i] # Intialize data object if this is the first iteration if cnum == 0: data = {k: list() for k in parameters[i]} # Add to general data object for p in parameters: for k in p.keys(): data[k].append(p[k]) # Optionally, remove any hyperparameters which have the same # value in all workflows. n_parameters = len(list(data.keys())) if removeconstants: if verbose: print('Removing parameters with constant values.') keys = list(data.keys()) for k in keys: # First convert all values to strings so we can use set tempdata = [str(i) for i in data[k]] # Count unique values, and if only one, delete n_unique = len(list(set(tempdata))) if n_unique == 1: if verbose: print(f'\t Removing parameter {k}.') del data[k] # Write to csv if output name is provided if output is not None: if verbose: print(f'Writing output to {output}.') # First, specify order of columns for easy reading columns = list(data.keys()) starters = [ 'Iteration', 'Rank', 'Metric', 'mean_validation_score', 'mean_train_score', 'mean_fit_time' ] for key in starters: columns.remove(key) columns = starters + columns # Write to dataframe df = pd.DataFrame(data) df.to_csv(output, index=False, columns=columns) # Display some information if verbose: print(f'Number of hyperparameters: {n_parameters}.') if removeconstants: n_parameters_unique = len(list(data.keys())) print( f'Number of hyperparameters with unique values: {n_parameters_unique}.' ) return data
def RankSVM_train_old(train_data, train_target, cost=1, lambda_tol=1e-6, norm_tol=1e-4, max_iter=500, svm='Poly', gamma=0.05, coefficient=0.05, degree=3): # NOTE: Only multilabel classification, not multiclass! Make a check. ''' Weights,Bias,SVs = RankSVM_train(train_data,train_target,cost,lambda_tol,norm_tol,max_iter,svm,gamma,coefficient,degree) Description RankSVM_train takes, train_data - An MxN array, the ith instance of training instance is stored in train_data[i,:] train_target - A QxM array, if the ith training instance belongs to the jth class, then train_target[j,i] equals +1, otherwise train_target(j,i) equals -1 svm - svm gives the type of svm used in training, which can take the value of 'RBF', 'Poly' or 'Linear'; svm.para gives the corresponding parameters used for the svm: 1) if svm is 'RBF', then gamma gives the value of gamma, where the kernel is exp(-Gamma*|x[i]-x[j]|^2) 2) if svm is 'Poly', then three values are used gamma, coefficient, and degree respectively, where the kernel is (gamma*<x[i],x[j]>+coefficient)^degree. 3) if svm is 'Linear', then svm is []. cost - The value of 'C' used in the SVM, default=1 lambda_tol - The tolerance value for lambda described in the appendix of [1]; default value is 1e-6 norm_tol - The tolerance value for difference between alpha(p+1) and alpha(p) described in the appendix of [1]; default value is 1e-4 max_iter - The maximum number of iterations for RankSVM, default=500 and returns, Weights - The value for beta[ki] as described in the appendix of [1] is stored in Weights[k,i] Bias - The value for b[i] as described in the appendix of [1] is stored in Bias[1,i] SVs - The ith support vector is stored in SVs[:,i] For more details,please refer to [1] and [2]. ''' # RankedSVM only works for multilabel problems, not multiclass, so check # Whether patients have no class or multiple classes n_class = train_target.shape[0] n_object = train_target.shape[1] for i in range(0, n_object): if np.sum(train_target[:, i]) != -n_class + 2: raise WORCexceptions.WORCIOError('RankedSVM only works ' + 'for multilabel problems,' + ' not multiclass. One or ' + 'more objects belong ' + 'either to no class or' + ' multiple classes. ' + 'Please check your data' + ' again.') num_training, tempvalue = np.shape(train_data) SVs = np.zeros(shape=(tempvalue,num_training)) num_class, tempvalue = np.shape(train_target) lc = np.ones(shape=(1,num_class)) target = np.zeros(shape=(num_class, tempvalue)) for i in range(num_training): temp = train_target[:,int(i)] if np.logical_and(np.sum(temp) != num_class, np.sum(temp) != -num_class): #SVs = (SVs, train_data[int(i),:].conj().T) SVs [:,i] = train_data[int(i),:].conj().T target[:,i] = temp Dim, num_training = np.shape(SVs) Label = np.array(np.zeros(shape=(num_training,1)), dtype=float) not_Label = [] Label_size = np.zeros(shape=(1,num_training)) size_alpha = np.zeros(shape=(1,num_training), dtype=float) for i in range(num_training): temp1 = train_target[:,int(i)] Label_size[0,int(i)] = np.sum(temp1 == lc) lds = num_class-Label_size[0,int(i)] size_alpha[0,int(i)] = np.dot(lds, Label_size[0,int(i)]) for j in range(num_class): if temp1[int(j)] == 1: Label[int(i),0] = np.array([j]) else: not_Label.append((j)) not_Label = np.reshape(not_Label, (num_training,num_class-1)) kernel = np.zeros(shape =(num_training, num_training), dtype=float) if svm == 'RBF': for i in range(num_training): for j in range(num_training): kernel[int(i),int(j)] = np.exp(-gamma*(np.sum((SVs[:,i]-SVs[:,j])**2))) else: if svm == 'Poly': for i in range(num_training): for j in range(num_training): ab= np.dot((np.array([SVs[:,int(j)]])),((np.array([SVs[:,int(i)]])).conj().T)) ab=gamma*ab ab=ab+coefficient ab=ab**degree #kernel[int(i),int(j)] = (gamma*(SVs[:,int(i)].conj().T)*SVs[:,int(j)]+coefficient)**degree kernel[int(i),int(j)] = np.array([ab]) else: for i in range(num_training): for j in range(num_training): kernel[int(i),int(j)] = np.dot((np.array([SVs[:,int(j)]])),((np.array([SVs[:,int(i)]])).conj().T)) svm_used=svm; #Begin training phase #data initializing ak = np.sum(size_alpha, dtype=int) Alpha = np.zeros(shape=(1, ak)) ####creating a cell c_value c_value = np.zeros((num_class,), dtype=np.object) for i in range(num_class): c_value[i] = np.zeros(shape=(num_class,num_class)) for i in range(num_class): ak = c_value[i] ak[i,:]= np.ones(shape=(1,num_class)) ak[:,i]= -np.ones(shape=(num_class,)) c_value[i] = ak #print Label_size ### Find the Alpha value using Franke and Wolfe method [1] continuing = True iteration = 0 while(continuing): #computing Beta #iteration=iteration+1; #disp(strcat('current iteration: ',num2str(iteration))) Beta = np.zeros(shape=(num_class,num_training)) for k in range(num_class): for i in range(num_training): for m in range(Label_size[:,int(i)]): for n in range(num_class-Label_size[:,int(i)]): #index = np.sum(size_alpha[:,0:i])+(m-1)*(num_class-Label_size[i])+n index = np.sum(size_alpha[:,0:i])+n ak = np.array(c_value[k], dtype=int) r1 = Label[int(i)] ####this supports for only for multiclass ### if you want to work on multilabel then try this: r1 = Label[i] #################################################### r1 = r1[m] c1 = not_Label[int(i)] c1 = c1[n] Beta[k,i] = Beta[k,i]+ak[int(r1),int(c1)]*Alpha[:,int(index)] ####computing gradient(ikl) inner = np.zeros(shape=(num_class,num_training)) for k in range(num_class): for j in range(num_training): inner[k,j] = np.dot(Beta[k,:], kernel[:,j]) gradient=[] for i in range(num_training): for m in range(Label_size[:,int(i)]): for n in range(num_class-Label_size[:,int(i)]): r1 = Label[int(i)] ####this supports only for multiclass ### if you want to work on multilabel then try this: r1 = Label[i] #################################################### r1 = r1[m] c1 = not_Label[int(i)] c1 = c1[n] temp = inner[int(r1), int(i)]-inner[int(c1),int(i)]-1 #gradient=np.array([gradient,temp]) gradient.append(float(temp)) gradient = np.array(gradient, dtype=float) gradient = gradient.conj().T ###Find Alpha_new Aeq = np.zeros(shape=(num_class,np.sum(size_alpha, dtype=int))) for k in range(num_class): counter=0 for i in range(num_training): for m in range (Label_size[:,int(i)]): for n in range(num_class-Label_size[:,int(i)]): #counter+=1 r1 = Label[i] ####this supports only for multiclass ### if you want to work on multilabel then try this: r1 = Label[i] #################################################### r1 = r1[m] c1 = not_Label[int(i)] c1 = c1[n] ak = c_value[k] Aeq[k,counter] = ak[int(r1),int(c1)] counter+=1 #print Aeq beq=np.zeros(shape=(num_class,)) LB=np.zeros(shape=(np.sum(size_alpha, dtype=int),1)) UB=np.zeros(shape=(np.sum(size_alpha, dtype=int),1)) counter=0 for i in range(num_training): for m in range(Label_size[:,int(i)]): for n in range(num_class-Label_size[:,int(i)]): #counter+=1 UB[counter,:]=cost/(size_alpha[:,i]) counter+=1 #print UB cc = [LB.T, UB.T] cc =np.ravel(cc) bounds = np.reshape(cc, (2,np.sum(size_alpha, dtype=int))) bounds = bounds.T Alpha_new=linprog(gradient.conj().T,A_ub=None, b_ub=None, A_eq=Aeq, b_eq=beq.T,bounds=bounds) Alpha_new = Alpha_new.x Alpha_new = (np.array(Alpha_new)).conj().T Lambda =fminbound(neg_dual_func, 0.0, 1.0,args= (Alpha,Alpha_new,c_value,kernel,num_training,num_class,Label,not_Label,Label_size,size_alpha)) #print Lambda #Test convergence if np.logical_or(np.abs(Lambda)<=lambda_tol, np.dot(Lambda, np.sqrt(np.sum(((Alpha_new-Alpha)**2.))))<=norm_tol): continuing = False # np.disp('program terminated normally') else: if iteration >= max_iter: continuing = False else: Alpha = Alpha+np.dot(Lambda, Alpha_new-Alpha) iteration+=1 Weights = Beta #Computing Bias Left = [] Right = [] for i in range(num_training): for m in range(Label_size[:,int(i)]): for n in range(num_class-Label_size[:,int(i)]): index = np.sum(size_alpha[:,0:i])+n if np.logical_and(np.abs(Alpha[:,int(index)]) >= lambda_tol, np.abs(Alpha[:,int(index)]-cost/(size_alpha[:,i])) >= lambda_tol): vector = np.zeros(shape=(1, num_class)) vector[0,int(Label[i])] = 1 c1 = not_Label[int(i)] c1 = c1[n] vector[0,int(c1)] = -1. Left.append(vector) Right.append(-gradient[int(index)]) if is_empty(Left): Bias = np.sum(train_target.conj().T) else: bb = np.array([Right]) ss1,ss2 = bb.shape aa = np.ravel(Left) aa = np.reshape(aa,(ss2,num_class)) ##### Proper way to solve linear equation with non-square matrix Bias = np.linalg.lstsq(aa,bb.T,rcond = -1)[0] #Bias = Bias.T return Weights, Bias, SVs
def random_split_cross_validation(image_features, feature_labels, classes, patient_ids, n_iterations, param_grid, config, modus, test_size, start=0, save_data=None, tempsave=False, tempfolder=None, fixedsplits=None, fixed_seed=False, use_fastr=None, fastr_plugin=None): """Cross-validation in which data is randomly split in each iteration. Due to options of doing single-label and multi-label classification, stratified splitting, and regression, we use a manual loop instead of the default scikit-learn object. Parameters ------------ Returns ------------ """ print('Starting random-split cross-validation.') logging.debug('Starting random-split cross-validation.') if save_data is None: # Start from zero, thus empty list of previos data save_data = list() for i in range(start, n_iterations): print(('Cross-validation iteration {} / {} .').format( str(i + 1), str(n_iterations))) logging.debug(('Cross-validation iteration {} / {} .').format( str(i + 1), str(n_iterations))) timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime()) print(f'\t Time: {timestamp}.') logging.debug(f'\t Time: {timestamp}.') if fixed_seed: random_seed = i**2 else: random_seed = np.random.randint(5000) t = time.time() # Split into test and training set, where the percentage of each # label is maintained if any(clf in regressors for clf in param_grid['classifiers']): # We cannot do a stratified shuffle split with regression stratify = None else: if modus == 'singlelabel': classes_temp = stratify = classes.ravel() elif modus == 'multilabel': # Create a stratification object from the labels # Label = 0 means no label equals one # Other label numbers refer to the label name that is 1 stratify = list() for pnum in range(0, len(classes[0])): plabel = 0 for lnum, slabel in enumerate(classes): if slabel[pnum] == 1: plabel = lnum + 1 stratify.append(plabel) # Sklearn multiclass requires rows to be objects/patients classes_temp = np.zeros((classes.shape[1], classes.shape[0])) for n_patient in range(0, classes.shape[1]): for n_label in range(0, classes.shape[0]): classes_temp[n_patient, n_label] = classes[n_label, n_patient] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) if fixedsplits is None: # Use Random Split. Split per patient, not per sample unique_patient_ids, unique_indices =\ np.unique(np.asarray(patient_ids), return_index=True) if any(clf in regressors for clf in param_grid['classifiers']): unique_stratify = None else: unique_stratify = [stratify[i] for i in unique_indices] try: unique_PID_train, indices_PID_test\ = train_test_split(unique_patient_ids, test_size=test_size, random_state=random_seed, stratify=unique_stratify) except ValueError as e: e = str(e) + ' Increase the size of your validation set.' raise ae.WORCValueError(e) # Check for all ids if they are in test or training indices_train = list() indices_test = list() patient_ID_train = list() patient_ID_test = list() for num, pid in enumerate(patient_ids): if pid in unique_PID_train: indices_train.append(num) # Make sure we get a unique ID if pid in patient_ID_train: n = 1 while str(pid + '_' + str(n)) in patient_ID_train: n += 1 pid = str(pid + '_' + str(n)) patient_ID_train.append(pid) else: indices_test.append(num) # Make sure we get a unique ID if pid in patient_ID_test: n = 1 while str(pid + '_' + str(n)) in patient_ID_test: n += 1 pid = str(pid + '_' + str(n)) patient_ID_test.append(pid) # Split features and labels accordingly X_train = [image_features[i] for i in indices_train] X_test = [image_features[i] for i in indices_test] if modus == 'singlelabel': Y_train = classes_temp[indices_train] Y_test = classes_temp[indices_test] elif modus == 'multilabel': Y_train = classes_temp[indices_train, :] Y_test = classes_temp[indices_test, :] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) else: # Use pre defined splits train = fixedsplits[str(i) + '_train'].values test = fixedsplits[str(i) + '_test'].values # Convert the numbers to the correct indices ind_train = list() for j in train: success = False for num, p in enumerate(patient_ids): if j == p: ind_train.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") ind_test = list() for j in test: success = False for num, p in enumerate(patient_ids): if j == p: ind_test.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") X_train = [image_features[i] for i in ind_train] X_test = [image_features[i] for i in ind_test] patient_ID_train = patient_ids[ind_train] patient_ID_test = patient_ids[ind_test] if modus == 'singlelabel': Y_train = classes_temp[ind_train] Y_test = classes_temp[ind_test] elif modus == 'multilabel': Y_train = classes_temp[ind_train, :] Y_test = classes_temp[ind_test, :] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) # Find best hyperparameters and construct classifier config['HyperOptimization']['use_fastr'] = use_fastr config['HyperOptimization']['fastr_plugin'] = fastr_plugin n_cores = config['General']['Joblib_ncores'] trained_classifier = random_search_parameters( features=X_train, labels=Y_train, param_grid=param_grid, n_cores=n_cores, random_seed=random_seed, **config['HyperOptimization']) # We only want to save the feature values and one label array X_train = [x[0] for x in X_train] X_test = [x[0] for x in X_test] temp_save_data = (trained_classifier, X_train, X_test, Y_train, Y_test, patient_ID_train, patient_ID_test, random_seed) save_data.append(temp_save_data) # Create a temporary save if tempsave: panda_labels = [ 'trained_classifier', 'X_train', 'X_test', 'Y_train', 'Y_test', 'config', 'patient_ID_train', 'patient_ID_test', 'random_seed', 'feature_labels' ] panda_data_temp =\ pd.Series([trained_classifier, X_train, X_test, Y_train, Y_test, config, patient_ID_train, patient_ID_test, random_seed, feature_labels], index=panda_labels, name='Constructed crossvalidation') panda_data = pd.DataFrame(panda_data_temp) n = 0 filename = os.path.join(tempfolder, 'tempsave_' + str(i) + '.hdf5') while os.path.exists(filename): n += 1 filename = os.path.join(tempfolder, 'tempsave_' + str(i + n) + '.hdf5') panda_data.to_hdf(filename, 'EstimatorData') del panda_data, panda_data_temp # Print elapsed time elapsed = int((time.time() - t) / 60.0) print(f'\t Fitting took {elapsed} minutes.') logging.debug(f'\t Fitting took {elapsed} minutes.') return save_data
def crossval(config, label_data, image_features, param_grid=None, use_fastr=False, fastr_plugin=None, tempsave=False, fixedsplits=None, ensemble={'Use': False}, outputfolder=None, modus='singlelabel'): """ Constructs multiple individual classifiers based on the label settings Parameters ---------- config: dict, mandatory Dictionary with config settings. See the Github Wiki for the available fields and formatting. label_data: dict, mandatory Should contain the following: patient_IDs (list): IDs of the patients, used to keep track of test and training sets, and label data label (list): List of lists, where each list contains the label status for that patient for each label label_name (list): Contains the different names that are stored in the label object image_features: numpy array, mandatory Consists of a tuple of two lists for each patient: (feature_values, feature_labels) param_grid: dictionary, optional Contains the parameters and their values wich are used in the grid or randomized search hyperparamater optimization. See the construct_classifier function for some examples. use_fastr: boolean, default False If False, parallel execution through Joblib is used for fast execution of the hyperparameter optimization. Especially suited for execution on mutlicore (H)PC's. The settings used are specified in the config.ini file in the IOparser folder, which you can adjust to your system. If True, fastr is used to split the hyperparameter optimization in separate jobs. Parameters for the splitting can be specified in the config file. Especially suited for clusters. fastr_plugin: string, default None Determines which plugin is used for fastr executions. When None, uses the default plugin from the fastr config. tempsave: boolean, default False If True, create a .hdf5 file after each cross validation containing the classifier and results from that that split. This is written to the GSOut folder in your fastr output mount. If False, only the result of all combined cross validations will be saved to a .hdf5 file. This will also be done if set to True. fixedsplits: string, optional By default, random split cross validation is used to train and evaluate the machine learning methods. Optionally, you can provide a .xlsx file containing fixed splits to be used. See the Github Wiki for the format. ensemble: dictionary, optional Contains the configuration for constructing an ensemble. modus: string, default 'singlelabel' Determine whether one-vs-all classification (or regression) for each single label is used ('singlelabel') or if multilabel classification is performed ('multilabel'). Returns ---------- panda_data: pandas dataframe Contains all information on the trained classifier. """ if tempsave: import fastr # Define all possible regressors regressors = ['SVR', 'RFR', 'SGDR', 'Lasso', 'ElasticNet'] # Process input data patient_IDs = label_data['patient_IDs'] label_value = label_data['label'] label_name = label_data['label_name'] if outputfolder is None: logfilename = os.path.join(os.getcwd(), 'classifier.log') else: logfilename = os.path.join(outputfolder, 'classifier.log') print("Logging to file " + str(logfilename)) for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig(filename=logfilename, level=logging.DEBUG) N_iterations = config['CrossValidation']['N_iterations'] test_size = config['CrossValidation']['test_size'] classifier_labelss = dict() logging.debug('Starting classifier') # We only need one label instance, assuming they are all the sample feature_labels = image_features[0][1] # Check if we need to use fixedsplits: if fixedsplits is not None and '.xlsx' in fixedsplits: # fixedsplits = '/home/mstarmans/Settings/RandomSufflingOfData.xlsx' wb = xlrd.open_workbook(fixedsplits) wb = wb.sheet_by_index(1) if modus == 'singlelabel': print('Performing Single class classification.') logging.debug('Performing Single class classification.') elif modus == 'multilabel': print('Performing Multi label classification.') logging.debug('Performing Multi class classification.') label_value = [label_value] label_name = [label_name] else: m = ('{} is not a valid modus!').format(modus) logging.debug(m) raise ae.WORCKeyError(m) for i_class, i_name in zip(label_value, label_name): if modus == 'singlelabel': i_class_temp = i_class.ravel() save_data = list() for i in range(0, N_iterations): print(('Cross validation iteration {} / {} .').format(str(i + 1), str(N_iterations))) logging.debug(('Cross validation iteration {} / {} .').format(str(i + 1), str(N_iterations))) random_seed = np.random.randint(5000) # Split into test and training set, where the percentage of each # label is maintained if any(clf in regressors for clf in param_grid['classifiers']): # We cannot do a stratified shuffle split with regression stratify = None else: if modus == 'singlelabel': stratify = i_class_temp elif modus == 'multilabel': # Create a stratification object from the labels # Label = 0 means no label equals one # Other label numbers refer to the label name that is 1 stratify = list() for pnum in range(0, len(i_class[0])): plabel = 0 for lnum, slabel in enumerate(i_class): if slabel[pnum] == 1: plabel = lnum + 1 stratify.append(plabel) # Sklearn multiclass requires rows to be objects/patients # i_class = i_class.reshape(i_class.shape[1], i_class.shape[0]) i_class_temp = np.zeros((i_class.shape[1], i_class.shape[0])) for n_patient in range(0, i_class.shape[1]): for n_label in range(0, i_class.shape[0]): i_class_temp[n_patient, n_label] = i_class[n_label, n_patient] i_class_temp = i_class_temp else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) if fixedsplits is None: # Use Random Split. Split per patient, not per sample unique_patient_IDs, unique_indices =\ np.unique(np.asarray(patient_IDs), return_index=True) if any(clf in regressors for clf in param_grid['classifiers']): unique_stratify = None else: unique_stratify = [stratify[i] for i in unique_indices] try: unique_PID_train, indices_PID_test\ = train_test_split(unique_patient_IDs, test_size=test_size, random_state=random_seed, stratify=unique_stratify) except ValueError as e: e = str(e) + ' Increase the size of your validation set.' raise ae.WORCValueError(e) # Check for all IDs if they are in test or training indices_train = list() indices_test = list() patient_ID_train = list() patient_ID_test = list() for num, pid in enumerate(patient_IDs): if pid in unique_PID_train: indices_train.append(num) # Make sure we get a unique ID if pid in patient_ID_train: n = 1 while str(pid + '_' + str(n)) in patient_ID_train: n += 1 pid = str(pid + '_' + str(n)) patient_ID_train.append(pid) else: indices_test.append(num) # Make sure we get a unique ID if pid in patient_ID_test: n = 1 while str(pid + '_' + str(n)) in patient_ID_test: n += 1 pid = str(pid + '_' + str(n)) patient_ID_test.append(pid) # Split features and labels accordingly X_train = [image_features[i] for i in indices_train] X_test = [image_features[i] for i in indices_test] if modus == 'singlelabel': Y_train = i_class_temp[indices_train] Y_test = i_class_temp[indices_test] elif modus == 'multilabel': Y_train = i_class_temp[indices_train, :] Y_test = i_class_temp[indices_test, :] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) else: # Use pre defined splits indices = wb.col_values(i) indices = [int(j) for j in indices[1:]] # First element is "Iteration x" train = indices[0:121] test = indices[121:] # Convert the numbers to the correct indices ind_train = list() for j in train: success = False for num, p in enumerate(patient_IDs): if str(j).zfill(3) == p[0:3]: ind_train.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") ind_test = list() for j in test: success = False for num, p in enumerate(patient_IDs): if str(j).zfill(3) == p[0:3]: ind_test.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") X_train = np.asarray(image_features)[ind_train].tolist() Y_train = np.asarray(i_class_temp)[ind_train].tolist() patient_ID_train = patient_IDs[ind_train] X_test = np.asarray(image_features)[ind_test].tolist() Y_test = np.asarray(i_class_temp)[ind_test].tolist() patient_ID_test = patient_IDs[ind_test] # Find best hyperparameters and construct classifier config['HyperOptimization']['use_fastr'] = use_fastr config['HyperOptimization']['fastr_plugin'] = fastr_plugin n_cores = config['General']['Joblib_ncores'] trained_classifier = random_search_parameters(features=X_train, labels=Y_train, param_grid=param_grid, n_cores=n_cores, **config['HyperOptimization']) # Create an ensemble if required if ensemble['Use']: trained_classifier.create_ensemble(X_train, Y_train) # We only want to save the feature values and one label array X_train = [x[0] for x in X_train] X_test = [x[0] for x in X_test] temp_save_data = (trained_classifier, X_train, X_test, Y_train, Y_test, patient_ID_train, patient_ID_test, random_seed) save_data.append(temp_save_data) # Create a temporary save if tempsave: panda_labels = ['trained_classifier', 'X_train', 'X_test', 'Y_train', 'Y_test', 'config', 'patient_ID_train', 'patient_ID_test', 'random_seed'] panda_data_temp =\ pd.Series([trained_classifier, X_train, X_test, Y_train, Y_test, config, patient_ID_train, patient_ID_test, random_seed], index=panda_labels, name='Constructed crossvalidation') panda_data = pd.DataFrame(panda_data_temp) n = 0 filename = os.path.join(fastr.config.mounts['tmp'], 'GSout', 'RS_' + str(i) + '.hdf5') while os.path.exists(filename): n += 1 filename = os.path.join(fastr.config.mounts['tmp'], 'GSout', 'RS_' + str(i + n) + '.hdf5') if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) panda_data.to_hdf(filename, 'SVMdata') del panda_data, panda_data_temp [classifiers, X_train_set, X_test_set, Y_train_set, Y_test_set, patient_ID_train_set, patient_ID_test_set, seed_set] =\ zip(*save_data) panda_labels = ['classifiers', 'X_train', 'X_test', 'Y_train', 'Y_test', 'config', 'patient_ID_train', 'patient_ID_test', 'random_seed', 'feature_labels'] panda_data_temp =\ pd.Series([classifiers, X_train_set, X_test_set, Y_train_set, Y_test_set, config, patient_ID_train_set, patient_ID_test_set, seed_set, feature_labels], index=panda_labels, name='Constructed crossvalidation') if modus == 'singlelabel': i_name = ''.join(i_name) elif modus == 'multilabel': i_name = ','.join(i_name) classifier_labelss[i_name] = panda_data_temp panda_data = pd.DataFrame(classifier_labelss) return panda_data
def plot_SVM(prediction, label_data, label_type, show_plots=False, alpha=0.95, ensemble=False, verbose=True, ensemble_scoring=None, output='stats', modus='singlelabel'): ''' Plot the output of a single binary estimator, e.g. a SVM. Parameters ---------- prediction: pandas dataframe or string, mandatory output of trainclassifier function, either a pandas dataframe or a HDF5 file label_data: string, mandatory Contains the path referring to a .txt file containing the patient label(s) and value(s) to be used for learning. See the Github Wiki for the format. label_type: string, mandatory Name of the label to extract from the label data to test the estimator on. show_plots: Boolean, default False Determine whether matplotlib performance plots are made. alpha: float, default 0.95 Significance of confidence intervals. ensemble: False, integer or 'Caruana' Determine whether an ensemble will be created. If so, either provide an integer to determine how many of the top performing classifiers should be in the ensemble, or use the string "Caruana" to use smart ensembling based on Caruana et al. 2004. verbose: boolean, default True Plot intermedate messages. ensemble_scoring: string, default None Metric to be used for evaluating the ensemble. If None, the option set in the prediction object will be used. output: string, default stats Determine which results are put out. If stats, the statistics of the estimator will be returned. If scores, the scores will be returned. Returns ---------- Depending on the output parameters, the following outputs are returned: If output == 'stats': stats: dictionary Contains the confidence intervals of the performance metrics and the number of times each patient was classifier correctly or incorrectly. If output == 'scores': y_truths: list Contains the true label for each object. y_scores: list Contains the score (e.g. posterior) for each object. y_predictions: list Contains the predicted label for each object. PIDs: list Contains the patient ID/name for each object. ''' # Load the prediction object if it's a hdf5 file if type(prediction) is not pd.core.frame.DataFrame: if os.path.isfile(prediction): prediction = pd.read_hdf(prediction) else: raise ae.WORCIOError(('{} is not an existing file!').format(str(prediction))) # Select the estimator from the pandas dataframe to use keys = prediction.keys() SVMs = list() if label_type is None: label_type = keys[0] # Load the label data if type(label_data) is not dict: if os.path.isfile(label_data): if type(label_type) is not list: # Singlelabel: convert to list label_type = [[label_type]] label_data = lp.load_labels(label_data, label_type) patient_IDs = label_data['patient_IDs'] labels = label_data['label'] if type(label_type) is list: # FIXME: Support for multiple label types not supported yet. print('[WORC Warning] Support for multiple label types not supported yet. Taking first label for plot_SVM.') label_type = keys[0] # Extract the estimators, features and labels SVMs = prediction[label_type]['classifiers'] regression = is_regressor(SVMs[0].best_estimator_) Y_test = prediction[label_type]['Y_test'] X_test = prediction[label_type]['X_test'] X_train = prediction[label_type]['X_train'] Y_train = prediction[label_type]['Y_train'] feature_labels = prediction[label_type]['feature_labels'] # Create lists for performance measures sensitivity = list() specificity = list() precision = list() accuracy = list() auc = list() f1_score_list = list() patient_classification_list = dict() if output in ['scores', 'decision']: # Keep track of all groundth truths and scores y_truths = list() y_scores = list() y_predictions = list() PIDs = list() # Loop over the test sets, which probably correspond with cross validation # iterations for i in range(0, len(Y_test)): print("\n") print(("Cross validation {} / {}.").format(str(i + 1), str(len(Y_test)))) test_patient_IDs = prediction[label_type]['patient_ID_test'][i] train_patient_IDs = prediction[label_type]['patient_ID_train'][i] X_test_temp = X_test[i] X_train_temp = X_train[i] Y_train_temp = Y_train[i] Y_test_temp = Y_test[i] test_indices = list() # Check which patients are in the test set. for i_ID in test_patient_IDs: test_indices.append(np.where(patient_IDs == i_ID)[0][0]) # Initiate counting how many times a patient is classified correctly if i_ID not in patient_classification_list: patient_classification_list[i_ID] = dict() patient_classification_list[i_ID]['N_test'] = 0 patient_classification_list[i_ID]['N_correct'] = 0 patient_classification_list[i_ID]['N_wrong'] = 0 patient_classification_list[i_ID]['N_test'] += 1 # Extract ground truth y_truth = Y_test_temp # If requested, first let the SearchCV object create an ensemble if ensemble: # NOTE: Added for backwards compatability if not hasattr(SVMs[i], 'cv_iter'): cv_iter = list(SVMs[i].cv.split(X_train_temp, Y_train_temp)) SVMs[i].cv_iter = cv_iter # Create the ensemble X_train_temp = [(x, feature_labels) for x in X_train_temp] SVMs[i].create_ensemble(X_train_temp, Y_train_temp, method=ensemble, verbose=verbose, scoring=ensemble_scoring) # Create prediction y_prediction = SVMs[i].predict(X_test_temp) if regression: y_score = y_prediction else: y_score = SVMs[i].predict_proba(X_test_temp)[:, 1] print("Truth: " + str(y_truth)) print("Prediction: " + str(y_prediction)) # Add if patient was classified correctly or not to counting for i_truth, i_predict, i_test_ID in zip(y_truth, y_prediction, test_patient_IDs): if modus == 'multilabel': success = (i_truth == i_predict).all() else: success = i_truth == i_predict if success: patient_classification_list[i_test_ID]['N_correct'] += 1 else: patient_classification_list[i_test_ID]['N_wrong'] += 1 y_score = SVMs[i].predict_proba(X_test_temp)[:, 1] if output == 'decision': # Output the posteriors y_scores.append(y_score) y_truths.append(y_truth) y_predictions.append(y_prediction) PIDs.append(test_patient_IDs) elif output == 'scores': # Output the posteriors y_scores.append(y_score) y_truths.append(y_truth) y_predictions.append(y_prediction) PIDs.append(test_patient_IDs) elif output == 'stats': # Compute statistics # Compute confusion matrix and use for sensitivity/specificity if modus == 'singlelabel': # Compute singlelabel performance metrics if not regression: accuracy_temp, sensitivity_temp, specificity_temp,\ precision_temp, f1_score_temp, auc_temp =\ metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) else: r2score, MSE, coefICC, PearsonC, PearsonP, SpearmanC,\ SpearmanP =\ metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) elif modus == 'multilabel': # Convert class objects to single label per patient y_truth_temp = list() y_prediction_temp = list() for yt, yp in zip(y_truth, y_prediction): label = np.where(yt == 1) if len(label) > 1: raise ae.WORCNotImplementedError('Multiclass classification evaluation is not supported in WORC.') y_truth_temp.append(label[0][0]) label = np.where(yp == 1) y_prediction_temp.append(label[0][0]) y_truth = y_truth_temp y_prediction = y_prediction_temp # Compute multilabel performance metrics accuracy_temp, sensitivity_temp, specificity_temp,\ precision_temp, f1_score_temp, auc_temp =\ metrics.performance_multilabel(y_truth, y_prediction, y_score) else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) # Print AUC to keep you up to date print('AUC: ' + str(auc_temp)) # Append performance to lists for all cross validations accuracy.append(accuracy_temp) sensitivity.append(sensitivity_temp) specificity.append(specificity_temp) auc.append(auc_temp) f1_score_list.append(f1_score_temp) precision.append(precision_temp) if output in ['scores', 'decision']: # Return the scores and true values of all patients return y_truths, y_scores, y_predictions, PIDs elif output == 'stats': # Compute statistics # Extract sample size N_1 = float(len(train_patient_IDs)) N_2 = float(len(test_patient_IDs)) # Compute alpha confidence intervallen stats = dict() stats["Accuracy 95%:"] = str(compute_CI.compute_confidence(accuracy, N_1, N_2, alpha)) stats["AUC 95%:"] = str(compute_CI.compute_confidence(auc, N_1, N_2, alpha)) stats["F1-score 95%:"] = str(compute_CI.compute_confidence(f1_score_list, N_1, N_2, alpha)) stats["Precision 95%:"] = str(compute_CI.compute_confidence(precision, N_1, N_2, alpha)) stats["Sensitivity 95%: "] = str(compute_CI.compute_confidence(sensitivity, N_1, N_2, alpha)) stats["Specificity 95%:"] = str(compute_CI.compute_confidence(specificity, N_1, N_2, alpha)) print("Accuracy 95%:" + str(compute_CI.compute_confidence(accuracy, N_1, N_2, alpha))) print("AUC 95%:" + str(compute_CI.compute_confidence(auc, N_1, N_2, alpha))) print("F1-score 95%:" + str(compute_CI.compute_confidence(f1_score_list, N_1, N_2, alpha))) print("Precision 95%:" + str(compute_CI.compute_confidence(precision, N_1, N_2, alpha))) print("Sensitivity 95%: " + str(compute_CI.compute_confidence(sensitivity, N_1, N_2, alpha))) print("Specificity 95%:" + str(compute_CI.compute_confidence(specificity, N_1, N_2, alpha))) # Extract statistics on how often patients got classified correctly alwaysright = dict() alwayswrong = dict() percentages = dict() for i_ID in patient_classification_list: percentage_right = patient_classification_list[i_ID]['N_correct'] / float(patient_classification_list[i_ID]['N_test']) if i_ID in patient_IDs: label = labels[0][np.where(i_ID == patient_IDs)] else: # Multiple instance of one patient label = labels[0][np.where(i_ID.split('_')[0] == patient_IDs)] label = label[0][0] percentages[i_ID] = str(label) + ': ' + str(round(percentage_right, 2) * 100) + '%' if percentage_right == 1.0: alwaysright[i_ID] = label print(("Always Right: {}, label {}").format(i_ID, label)) elif percentage_right == 0: alwayswrong[i_ID] = label print(("Always Wrong: {}, label {}").format(i_ID, label)) stats["Always right"] = alwaysright stats["Always wrong"] = alwayswrong stats['Percentages'] = percentages if show_plots: # Plot some characteristics in boxplots import matplotlib.pyplot as plt plt.figure() plt.boxplot(accuracy) plt.ylim([-0.05, 1.05]) plt.ylabel('Accuracy') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(auc) plt.ylim([-0.05, 1.05]) plt.ylabel('AUC') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(precision) plt.ylim([-0.05, 1.05]) plt.ylabel('Precision') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(sensitivity) plt.ylim([-0.05, 1.05]) plt.ylabel('Sensitivity') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(specificity) plt.ylim([-0.05, 1.05]) plt.ylabel('Specificity') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() return stats
def plot_estimator_performance(prediction, label_data, label_type, crossval_type=None, alpha=0.95, ensemble=None, verbose=True, ensemble_scoring=None, output=None, modus=None, thresholds=None, survival=False, shuffle_estimators=False, bootstrap=None, bootstrap_N=None, overfit_scaler=None): """Plot the output of a single estimator, e.g. a SVM. Parameters ---------- prediction: pandas dataframe or string, mandatory output of trainclassifier function, either a pandas dataframe or a HDF5 file label_data: string, mandatory Contains the path referring to a .txt file containing the patient label(s) and value(s) to be used for learning. See the Github Wiki for the format. label_type: string, mandatory Name of the label to extract from the label data to test the estimator on. alpha: float, default 0.95 Significance of confidence intervals. ensemble: False, integer or 'Caruana' Determine whether an ensemble will be created. If so, either provide an integer to determine how many of the top performing classifiers should be in the ensemble, or use the string "Caruana" to use smart ensembling based on Caruana et al. 2004. verbose: boolean, default True Plot intermedate messages. ensemble_scoring: string, default None Metric to be used for evaluating the ensemble. If None, the option set in the prediction object will be used. output: string, default stats Determine which results are put out. If stats, the statistics of the estimator will be returned. If scores, the scores will be returned. thresholds: list of integer(s), default None If None, use default threshold of sklearn (0.5) on posteriors to converge to a binary prediction. If one integer is provided, use that one. If two integers are provided, posterior < thresh[0] = 0, posterior > thresh[1] = 1. Returns ---------- Depending on the output parameters, the following outputs are returned: If output == 'stats': stats: dictionary Contains the confidence intervals of the performance metrics and the number of times each patient was classifier correctly or incorrectly. If output == 'scores': y_truths: list Contains the true label for each object. y_scores: list Contains the score (e.g. posterior) for each object. y_predictions: list Contains the predicted label for each object. pids: list Contains the patient ID/name for each object. """ # Load the prediction object if it's a hdf5 file if type(prediction) is not pd.core.frame.DataFrame: if os.path.isfile(prediction): prediction = pd.read_hdf(prediction) else: raise ae.WORCIOError( ('{} is not an existing file!').format(str(prediction))) # Select the estimator from the pandas dataframe to use keys = prediction.keys() if label_type is None: label_type = keys[0] # Load the label data if type(label_data) is not dict: if os.path.isfile(label_data): if type(label_type) is not list: # Singlelabel: convert to list label_type = [[label_type]] label_data = lp.load_labels(label_data, label_type) else: raise ae.WORCValueError( f"Label data {label_data} incorrect: not a dictionary, or file does not exist." ) n_labels = len(label_type) patient_IDs = label_data['patient_IDs'] labels = label_data['label'] if type(label_type) is list: # FIXME: Support for multiple label types not supported yet. print( '[WORC Warning] Support for multiple label types not supported yet. Taking first label for plot_estimator_performance.' ) label_type = keys[0] # Extract the estimators, features and labels regression = is_regressor( prediction[label_type]['classifiers'][0].best_estimator_) feature_labels = prediction[label_type]['feature_labels'] # Get some configuration variables if present in the prediction config = prediction[label_type].config if ensemble is None: ensemble = int(config['Ensemble']['Use']) if modus is None: modus = config['Labels']['modus'] if crossval_type is None: crossval_type = config['CrossValidation']['Type'] if bootstrap is None: bootstrap = config['Bootstrap']['Use'] if bootstrap_N is None: bootstrap_N = int(config['Bootstrap']['N_iterations']) if overfit_scaler is None: overfit_scaler = config['Evaluation']['OverfitScaler'] ensemble_metric = config['Ensemble']['Metric'] # Create lists for performance measures if not regression: sensitivity = list() specificity = list() precision = list() npv = list() accuracy = list() bca = list() auc = list() f1_score_list = list() if modus == 'multilabel': acc_av = list() # Also add scoring measures for all single label scores sensitivity_single = [list() for j in n_labels] specificity_single = [list() for j in n_labels] precision_single = [list() for j in n_labels] npv_single = [list() for j in n_labels] accuracy_single = [list() for j in n_labels] bca_single = [list() for j in n_labels] auc_single = [list() for j in n_labels] f1_score_list_single = [list() for j in n_labels] else: r2score = list() MSE = list() coefICC = list() PearsonC = list() PearsonP = list() SpearmanC = list() SpearmanP = list() patient_classification_list = dict() percentages_selected = list() if output in ['scores', 'decision'] or crossval_type == 'LOO': # Keep track of all groundth truths and scores y_truths = list() y_scores = list() y_predictions = list() pids = list() # Extract sample size N_1 = float(len(prediction[label_type]['patient_ID_train'][0])) N_2 = float(len(prediction[label_type]['patient_ID_test'][0])) # Convert tuples to lists if required if type(prediction[label_type]['X_test']) is tuple: prediction[label_type]['X_test'] = list( prediction[label_type]['X_test']) prediction[label_type]['X_train'] = list( prediction[label_type]['X_train']) prediction[label_type]['Y_train'] = list( prediction[label_type]['Y_train']) prediction[label_type]['Y_test'] = list( prediction[label_type]['Y_test']) prediction[label_type]['patient_ID_test'] = list( prediction[label_type]['patient_ID_test']) prediction[label_type]['patient_ID_train'] = list( prediction[label_type]['patient_ID_train']) prediction[label_type]['classifiers'] = list( prediction[label_type]['classifiers']) # Loop over the test sets, which correspond to cross-validation # or bootstrapping iterations n_iter = len(prediction[label_type]['Y_test']) if bootstrap: iterobject = range(0, bootstrap_N) else: iterobject = range(0, n_iter) for i in iterobject: print("\n") if bootstrap: print(f"Bootstrap {i + 1} / {bootstrap_N}.") else: print(f"Cross-validation {i + 1} / {n_iter}.") test_indices = list() # When bootstrapping, there is only a single train/test set. if bootstrap: if i == 0: X_test_temp_or = prediction[label_type]['X_test'][0] X_train_temp = prediction[label_type]['X_train'][0] Y_train_temp = prediction[label_type]['Y_train'][0] Y_test_temp_or = prediction[label_type]['Y_test'][0] test_patient_IDs_or = prediction[label_type][ 'patient_ID_test'][0] train_patient_IDs = prediction[label_type]['patient_ID_train'][ 0] fitted_model = prediction[label_type]['classifiers'][0] # Objects required for first iteration test_patient_IDs = test_patient_IDs_or[:] X_test_temp = X_test_temp_or[:] Y_test_temp = Y_test_temp_or[:] else: X_test_temp = prediction[label_type]['X_test'][i] X_train_temp = prediction[label_type]['X_train'][i] Y_train_temp = prediction[label_type]['Y_train'][i] Y_test_temp = prediction[label_type]['Y_test'][i] test_patient_IDs = prediction[label_type]['patient_ID_test'][i] train_patient_IDs = prediction[label_type]['patient_ID_train'][i] fitted_model = prediction[label_type]['classifiers'][i] # Check which patients are in the test set. if output == 'stats' and crossval_type != 'LOO': for i_ID in test_patient_IDs: # Initiate counting how many times a patient is classified correctly if i_ID not in patient_classification_list: patient_classification_list[i_ID] = dict() patient_classification_list[i_ID]['N_test'] = 0 patient_classification_list[i_ID]['N_correct'] = 0 patient_classification_list[i_ID]['N_wrong'] = 0 patient_classification_list[i_ID]['N_test'] += 1 # Check if this is exactly the label of the patient within the label file if i_ID not in patient_IDs: print( f'[WORC WARNING] Patient {i_ID} is not found the patient labels, removing underscore.' ) i_ID = i_ID.split("_")[0] if i_ID not in patient_IDs: print( f'[WORC WARNING] Did not help, excluding patient {i_ID}.' ) continue test_indices.append(np.where(patient_IDs == i_ID)[0][0]) # Extract ground truth y_truth = Y_test_temp # If required, shuffle estimators for "Random" ensembling if shuffle_estimators: # Randomly shuffle the estimators print('Shuffling estimators for random ensembling.') shuffle(fitted_model.cv_results_['params']) # If requested, first let the SearchCV object create an ensemble if bootstrap and i > 0: # For bootstrapping, only do this at the first iteration pass elif not fitted_model.ensemble: # If required, rank according to generalization score instead of mean_validation_score if ensemble_metric == 'generalization': print('Using generalization score for estimator ranking.') indices = fitted_model.cv_results_['rank_generalization_score'] fitted_model.cv_results_['params'] = [ fitted_model.cv_results_['params'][i] for i in indices[::-1] ] elif ensemble_metric != 'Default': raise ae.WORCKeyError( f'Metric {ensemble_metric} is not known: use Default or generalization.' ) # NOTE: Added for backwards compatability if not hasattr(fitted_model, 'cv_iter'): cv_iter = list( fitted_model.cv.split(X_train_temp, Y_train_temp)) fitted_model.cv_iter = cv_iter # Create the ensemble X_train_temp = [(x, feature_labels) for x in X_train_temp] fitted_model.create_ensemble(X_train_temp, Y_train_temp, method=ensemble, verbose=verbose, scoring=ensemble_scoring, overfit_scaler=overfit_scaler) # If bootstrap, generate a bootstrapped sample if bootstrap and i > 0: y_truth, y_prediction, y_score, test_patient_IDs =\ resample(y_truth_all, y_prediction_all, y_score_all, test_patient_IDs_or) else: # Create prediction y_prediction = fitted_model.predict(X_test_temp) if regression: y_score = y_prediction elif modus == 'multilabel': y_score = fitted_model.predict_proba(X_test_temp) else: y_score = fitted_model.predict_proba(X_test_temp)[:, 1] # Create a new binary score based on the thresholds if given if thresholds is not None: if len(thresholds) == 1: y_prediction = y_score >= thresholds[0] elif len(thresholds) == 2: # X_train_temp = [x[0] for x in X_train_temp] y_score_temp = list() y_prediction_temp = list() y_truth_temp = list() test_patient_IDs_temp = list() thresholds_val = fit_thresholds(thresholds, fitted_model, X_train_temp, Y_train_temp, ensemble, ensemble_scoring) for pnum in range(len(y_score)): if y_score[pnum] <= thresholds_val[0] or y_score[ pnum] > thresholds_val[1]: y_score_temp.append(y_score[pnum]) y_prediction_temp.append(y_prediction[pnum]) y_truth_temp.append(y_truth[pnum]) test_patient_IDs_temp.append( test_patient_IDs[pnum]) perc = float(len(y_prediction_temp)) / float( len(y_prediction)) percentages_selected.append(perc) print( f"Selected {len(y_prediction_temp)} from {len(y_prediction)} ({perc}%) patients using two thresholds." ) y_score = y_score_temp y_prediction = y_prediction_temp y_truth = y_truth_temp test_patient_IDs = test_patient_IDs_temp else: raise ae.WORCValueError( f"Need None, one or two thresholds on the posterior; got {len(thresholds)}." ) # If all scores are NaN, the classifier cannot do probabilities, thus # use hard predictions if np.sum(np.isnan(y_score)) == len(y_prediction): print( '[WORC Warning] All scores NaN, replacing with prediction.' ) y_score = y_prediction if bootstrap and i == 0: # Save objects for re-use y_truth_all = y_truth[:] y_prediction_all = y_prediction[:] y_score_all = y_score[:] print("Truth: " + str(y_truth)) print("Prediction: " + str(y_prediction)) print("Score: " + str(y_score)) if output == 'stats' and crossval_type != 'LOO': # Add if patient was classified correctly or not to counting for i_truth, i_predict, i_test_ID in zip(y_truth, y_prediction, test_patient_IDs): if modus == 'multilabel': success = (i_truth == i_predict).all() else: success = i_truth == i_predict if success: patient_classification_list[i_test_ID]['N_correct'] += 1 else: patient_classification_list[i_test_ID]['N_wrong'] += 1 if output in ['decision', 'scores'] or crossval_type == 'LOO': # Output the posteriors y_scores.append(y_score) y_truths.append(y_truth) y_predictions.append(y_prediction) pids.append(test_patient_IDs) elif output == 'stats': # Compute statistics print('Computing performance statistics.') # Compute confusion matrix and use for sensitivity/specificity performances = compute_statistics(y_truth, y_score, y_prediction, modus, regression) # Print AUC to keep you up to date if not regression: if modus == 'singlelabel': accuracy_temp, bca_temp, sensitivity_temp,\ specificity_temp, precision_temp, npv_temp,\ f1_score_temp, auc_temp = performances else: accuracy_temp, sensitivity_temp,\ specificity_temp, precision_temp, npv_temp,\ f1_score_temp, auc_temp, acc_av_temp,\ accuracy_temp_single,\ bca_temp_single, sensitivity_temp_single,\ specificity_temp_single, precision_temp_single,\ npv_temp_single, f1_score_temp_single,\ auc_temp_single = performances print('AUC: ' + str(auc_temp)) # Append performance to lists for all cross validations accuracy.append(accuracy_temp) bca.append(bca_temp) sensitivity.append(sensitivity_temp) specificity.append(specificity_temp) auc.append(auc_temp) f1_score_list.append(f1_score_temp) precision.append(precision_temp) npv.append(npv_temp) if modus == 'multilabel': acc_av.append(acc_av_temp) for j in n_labels: accuracy_single[j].append(accuracy_temp_single[j]) bca_single[j].append(bca_temp_single[j]) sensitivity_single[j].append( sensitivity_temp_single[j]) specificity_single[j].append( specificity_temp_single[j]) auc_single[j].append(auc_temp_single[j]) f1_score_list_single[j].append(f1_score_temp_single[j]) precision_single[j].append(precision_temp_single[j]) npv_single[j].append(npv_temp_single[j]) else: r2score_temp, MSE_temp, coefICC_temp, PearsonC_temp,\ PearsonP_temp, SpearmanC_temp,\ SpearmanP_temp = performances print('R2 Score: ' + str(r2score_temp)) r2score.append(r2score_temp) MSE.append(MSE_temp) coefICC.append(coefICC_temp) PearsonC.append(PearsonC_temp) PearsonP.append(PearsonP_temp) SpearmanC.append(SpearmanC_temp) SpearmanP.append(SpearmanP_temp) # Delete some objects to save memory in cross-validtion if not bootstrap: del fitted_model, X_test_temp, X_train_temp, Y_train_temp del Y_test_temp, test_patient_IDs, train_patient_IDs prediction[label_type]['X_test'][i] = None prediction[label_type]['X_train'][i] = None prediction[label_type]['Y_train'][i] = None prediction[label_type]['Y_test'][i] = None prediction[label_type]['patient_ID_test'][i] = None prediction[label_type]['patient_ID_train'][i] = None prediction[label_type]['classifiers'][i] = None if output in ['scores', 'decision']: # Return the scores and true values of all patients return y_truths, y_scores, y_predictions, pids elif output == 'stats': # Compute statistics stats = dict() output = dict() if crossval_type == 'LOO': performances = compute_statistics(y_truths, y_scores, y_predictions, modus, regression) if not regression: metric_names_single = [ 'Accuracy', 'BCA', 'Sensitivity', 'Specificity', 'Precision', 'NPV', 'F1-score', 'AUC' ] if modus == 'singlelabel': metric_names = metric_names_single elif modus == 'multilabel': metric_names_multi = [ 'Accuracy', 'Sensitivity', 'Specificity', 'Precision', 'NPV', 'F1-score', 'AUC', 'Average Accuracy' ] metric_names = metric_names_multi + metric_names_single else: # Regression metric_names = [ 'R2-score', 'MSE', 'ICC', 'PearsonC', 'PearsonP', 'SpearmanC', 'SpearmanP' ] # Put all metrics with their names in the statistics dict for k, v in zip(metric_names, performances): stats[k] = str(v) if thresholds is not None: if len(thresholds) == 2: # Compute percentage of patients that was selected stats["Percentage Selected"] = str(percentages_selected[0]) output['Statistics'] = stats else: # Compute alpha confidence intervals (CIs) # FIXME: multilabel performance per single label not included # FIXME: multilabel not working in bootstrap # FIXME: bootstrap not done in regression if not regression: metric_names_single = [ 'Accuracy', 'BCA', 'Sensitivity', 'Specificity', 'Precision', 'NPV', 'F1-score', 'AUC' ] if bootstrap: # Compute once for the real test set the performance X_test_temp = prediction[label_type]['X_test'][0] y_truth = prediction[label_type]['Y_test'][0] y_prediction = fitted_model.predict(X_test_temp) y_score = fitted_model.predict_proba(X_test_temp)[:, 1] performances_test =\ metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) # Aggregate bootstrapped performances performances_bootstrapped =\ [accuracy, bca, sensitivity, specificity, precision, npv, f1_score_list, auc] # Compute confidence intervals for all metrics for p in range(len(metric_names_single)): k = metric_names_single[p] + ' 95%' perf = performances_bootstrapped[p] perf_test = performances_test[p] stats[ k] = f"{perf_test} {str(compute_confidence_bootstrap(perf, perf_test, N_1, alpha))}" else: stats[ "Accuracy 95%:"] = f"{np.nanmean(accuracy)} {str(compute_confidence(accuracy, N_1, N_2, alpha))}" stats[ "BCA 95%:"] = f"{np.nanmean(bca)} {str(compute_confidence(bca, N_1, N_2, alpha))}" stats[ "AUC 95%:"] = f"{np.nanmean(auc)} {str(compute_confidence(auc, N_1, N_2, alpha))}" stats[ "F1-score 95%:"] = f"{np.nanmean(f1_score_list)} {str(compute_confidence(f1_score_list, N_1, N_2, alpha))}" stats[ "Precision 95%:"] = f"{np.nanmean(precision)} {str(compute_confidence(precision, N_1, N_2, alpha))}" stats[ "NPV 95%:"] = f"{np.nanmean(npv)} {str(compute_confidence(npv, N_1, N_2, alpha))}" stats[ "Sensitivity 95%: "] = f"{np.nanmean(sensitivity)} {str(compute_confidence(sensitivity, N_1, N_2, alpha))}" stats[ "Specificity 95%:"] = f"{np.nanmean(specificity)} {str(compute_confidence(specificity, N_1, N_2, alpha))}" if modus == 'multilabel': stats[ "Average Accuracy 95%:"] = f"{np.nanmean(acc_av)} {str(compute_confidence(acc_av, N_1, N_2, alpha))}" if thresholds is not None: if len(thresholds) == 2: # Compute percentage of patients that was selected stats[ "Percentage Selected 95%:"] = f"{np.nanmean(percentages_selected)} {str(compute_confidence(percentages_selected, N_1, N_2, alpha))}" # Extract statistics on how often patients got classified correctly rankings = dict() alwaysright = dict() alwayswrong = dict() percentages = dict() timesintestset = dict() for i_ID in patient_classification_list: percentage_right = patient_classification_list[i_ID][ 'N_correct'] / float( patient_classification_list[i_ID]['N_test']) if i_ID in patient_IDs: label = labels[0][np.where(i_ID == patient_IDs)] else: # Multiple instance of one patient label = labels[0][np.where( i_ID.split('_')[0] == patient_IDs)] label = label[0][0] percentages[i_ID] = str(label) + ': ' + str( round(percentage_right, 2) * 100) + '%' if percentage_right == 1.0: alwaysright[i_ID] = label print(f"Always Right: {i_ID}, label {label}.") elif percentage_right == 0: alwayswrong[i_ID] = label print(f"Always Wrong: {i_ID}, label {label}.") timesintestset[i_ID] = patient_classification_list[i_ID][ 'N_test'] rankings["Always right"] = alwaysright rankings["Always wrong"] = alwayswrong rankings['Percentages'] = percentages rankings['timesintestset'] = timesintestset output['Rankings'] = rankings else: # Regression stats[ 'R2-score 95%: '] = f"{np.nanmean(r2score)} {str(compute_confidence(r2score, N_1, N_2, alpha))}" stats[ 'MSE 95%: '] = f"{np.nanmean(MSE)} {str(compute_confidence(MSE, N_1, N_2, alpha))}" stats[ 'ICC 95%: '] = f"{np.nanmean(coefICC)} {str(compute_confidence(coefICC, N_1, N_2, alpha))}" stats[ 'PearsonC 95%: '] = f"{np.nanmean(PearsonC)} {str(compute_confidence(PearsonC, N_1, N_2, alpha))}" stats[ 'PearsonP 95%: '] = f"{np.nanmean(PearsonP)} {str(compute_confidence(PearsonP, N_1, N_2, alpha))}" stats[ 'SpearmanC 95%: '] = f"{np.nanmean(SpearmanC)} {str(compute_confidence(SpearmanC, N_1, N_2, alpha))}" stats[ 'SpearmanP 95%: '] = f"{np.nanmean(SpearmanP)} {str(compute_confidence(SpearmanP, N_1, N_2, alpha))}" # Print all CI's and add to output stats = OrderedDict(sorted(stats.items())) for k, v in stats.items(): print(f"{k} : {v}.") output['Statistics'] = stats return output