def postprocess_relevance(self, *args): relevance = helpers.arrays_to_numpy(*args) #select previously added dummy axis explicitly for removal. then reshape to original signal again return tuple([ self.input_shape_processor.backward(r[..., 0, 0]) for r in relevance ])
def preprocess_data(self, x_train, x_val, x_test, y_train, y_val, y_test): """ prepare data and labels as input to the model. convert input multi-dim arrays into vectors """ data = (x_train, x_val, x_test, y_train, y_val, y_test) if self.use_gpu: #move data to GPU if GPU execution is desired/possible data = helpers.arrays_to_cupy(*data) else: #otherwise, make sure the data is available to the CPU data = helpers.arrays_to_numpy(*data) return data
def preprocess_data(self, x_train, x_val, x_test, y_train, y_val, y_test): """ prepare data and labels as input to the model. add channel axis to multi-dim arrays """ #add additional 1-dim channel axis to training inputs data = (x_train[..., None], x_val[..., None], x_test[..., None], y_train, y_val, y_test) if self.use_gpu: #move data to GPU if GPU execution is desired/possible data = helpers.arrays_to_cupy(*data) else: #otherwise, make sure the data is available to the CPU data = helpers.arrays_to_numpy(*data) return data
def preprocess_data(self, x_train, x_val, x_test, y_train, y_val, y_test): """ prepare data and labels as input to the model. convert input multi-dim arrays into vectors, and add channel axis """ # convert 2d data to 1d data then add a 1-dim spatial axis and a 1-dim channel axis to training inputs data = (self.input_shape_processor.forward(x_train)[..., None, None], self.input_shape_processor.forward(x_val)[..., None, None], self.input_shape_processor.forward(x_test)[..., None, None], y_train, y_val, y_test) if self.use_gpu: #move data to GPU if GPU execution is desired/possible data = helpers.arrays_to_cupy(*data) else: #otherwise, make sure the data is available to the CPU data = helpers.arrays_to_numpy(*data) return data
def run_train_test_cycle(X, Y, L, LS, S, P, model_class, output_root_dir, data_name, target_name, training_programme=None, do_this_if_model_exists='skip', save_data_in_output_dir=True, force_device_for_training=None, force_device_for_evaluation=None, do_xval=True, decision_tree=False): """ This script trains and evaluates a model using the given data X,Y over all splits as determined in S Parameters: ----------- X : np.ndarray - An numpy.ndarray shaped (N, T, C), where N is the number of samples, T is the number of time points in the data and C is the number of channels per time point. Y : np.ndarray - An numpy.ndarray shaped (N, L), where N is the number of samples and L is the number of classes/labels L : list - a list of channel labels of length C, where C is the number of channels in the data. L holds textual descriptions of the data's channels LS: np.array - An numpy.ndarray shaped (N, S), where N is the number of samples and S is the number of existing subjects. Identifies the subject belonging to each datum X. Should run in parallel to the training labels Y S : list of lists - Contains indices determining the partitioning of the data. The outer lists groups the splits (ie len(S) groups of data) and each list element of S contains the indices of those lists. P : np.ndarray - An numpy.ndarray shaped (N,) describing the permutation applied to the input data X and the target labels Y. This allows referencing LS to Y and X. model: model_db.Model - a CLASS providing a set of required functions and the model architecture for executing the training and evaluation loop output_root_dir: str - a string pointing towards the root folder for writing results into. data_name: str - what is the data/feature type called? e.g. GRF or JA_X_Lower, ... target_name: str - what is the prediction target called? e.g. Subject, Gender or Injury, ... training_programme: (optional) ModelTraining class - If this parameter is not None, the model's default training regime will be overwritten with the passed ModelTraining class' train_model() function do_this_if_model_exists: str - variable controlling the training/evaluation behaviour if a trained model already exists at the model output location. options: retrain (do everything from scratch) load (load model and skip training, perform evaluation only) skip (completely skip, do nothing) save_data_in_output_dir: bool - controls wheter to save the experimental data (X, Y, L, LS, S) in the output directory force_device_for_training: str - values can be either gpu or cpu. force the use of this device during training. force_device_for_evaluation: str - values can either gpu or cpu. force the use of this device during evaluaton. here, the use of the GPU is almost always recommended due to the large batch size to be processed. do_xval: bool - controls wheter all data splits are run through a cross-evaluation scheme, or only data splits 0-2 are to be treated as dedicated training, validation and test splits decision_tree: bool - if True trains a decision tree model as a baseline/comparison option for the target model """ # some basic sanity checks assert Y.shape[0] == X.shape[0] == LS.shape[ 0], 'Number of samples differ between labels Y (n={}), data X (n={}) and subject labels LS (n={})'.format( L.shape[0], X.shape[0], LS.shape[0]) assert len(L) == X.shape[ 2], 'Number of provided channel names/labels in L (c={}) differs from number of channels in data X(c={})'.format( len(L), X.shape[2]) assert sum([len(s) for s in S]) == X.shape[ 0], 'Number of samples distributed over splits in S (n={}) differs from number of samples in X ({})'.format( sum([len(s) for s in S]), X.shape[0]) # save data, labels and split information in output directory. if save_data_in_output_dir: print('Saving training and evaluation data to {}'.format( output_root_dir)) helpers.ensure_dir_exists(output_root_dir) scipy.io.savemat('{}/data.mat'.format(output_root_dir), {'X': X}) scipy.io.savemat('{}/targets.mat'.format(output_root_dir), {'Y': Y}) scipy.io.savemat('{}/channel_labels.mat'.format(output_root_dir), {'L': L}) scipy.io.savemat('{}/subject_labels.mat'.format(output_root_dir), {'LS': LS}) scipy.io.savemat('{}/splits.mat'.format(output_root_dir), {'S': S}) scipy.io.savemat('{}/permutation.mat'.format(output_root_dir), {'P': P}) #prepare log to append anything happending in this session. kinda deprecated. logfile = open('{}/log.txt'.format(output_root_dir), 'a') # start main loop and execute training/evaluation for all the splits definied in S for split_index in range(len(S)): if split_index > 0 and not do_xval: cprint( colored( 'Cross-Validation has been disabled. Terminating after first iteration.', 'yellow')) #terminate here after one iteration, e.g. in case predetermined splits have been given. break model = model_class(output_root_dir, data_name, target_name, split_index) model_dir = model.path_dir() helpers.ensure_dir_exists(model_dir) # this case: do nothing. if model.exists() and do_this_if_model_exists == 'skip': print('Model already exists at {}. skipping'.format(model_dir)) continue #skip remaining code, there is nothing to be done. please move along. # other cases: split data in any case. measure time. set output log t_start = time.time() # collect data indices from split table j_test = split_index i_test = S[j_test] j_val = (split_index + 1) % len(S) i_val = S[j_val] j_train = list(set(range(len(S))) - {j_test, j_val}) i_train = [] for j in j_train: i_train.extend(S[j]) # collect data from indices x_train = X[i_train, ...] y_train = Y[i_train, ...] x_test = X[i_test, ...] y_test = Y[i_test, ...] x_val = X[i_val, ...] y_val = Y[i_val, ...] # remember shape of test data as originally given x_test_shape_orig = x_test.shape # model-specific data processing x_train, x_val, x_test, y_train, y_val, y_test =\ model.preprocess_data(x_train, x_val, x_test, y_train, y_val, y_test) if not model.exists() or (model.exists() and do_this_if_model_exists == 'retrain'): model.build_model(x_train.shape, y_train.shape) if training_programme is not None: #this instance-based monkey-patching is not the best way to do it, but probably the most flexible one. model.train_model = types.MethodType( training_programme.train_model, model) model.train_model(x_train, y_train, x_val, y_val, force_device=force_device_for_training) model.save_model() else: model.load_model() # compute test scores and relevance maps for model. results = model.evaluate_model( x_test, y_test, force_device=force_device_for_evaluation, lower_upper=helpers.get_channel_wise_bounds(x_train) ) # compute and give data bounds computed from training data. # measure time for training/evaluation cycle t_end = time.time() # write report for terminal printing report = '\n{}\n'.format(model.path_dir().replace('/', ' ')) report += 'test accuracy : {}\n'.format(results['acc']) report += 'test loss (l1): {}\n'.format(results['loss_l1']) report += 'train-evaluation-sequence done after {}s\n\n'.format( t_end - t_start) print(report) #dump results to output of this run with open('{}/scores.txt'.format(model.path_dir()), 'w') as f: f.write(report) #also write results to parsable log file for eval_score_logs module logfile.write(report) logfile.flush() #dump evaluation results to mat file scipy.io.savemat('{}/outputs.mat'.format(model.path_dir()), results) if decision_tree: # and (not model.exists() or (model.exists() and do_this_if_model_exists == 'retrain')): # DTree training and evaluation currently limited to settings where also the target model is trained. print('Training and evaluating alternative decision tree model') t_start = time.time() # make sure all data lives in CPU space for the DT model x_train, x_val, x_test, y_train, y_val, y_test =\ helpers.arrays_to_numpy(x_train, x_val, x_test, y_train, y_val, y_test) random_state = 42 #prep data for DT models x_train_dt = np.reshape(x_train, [x_train.shape[0], -1]) x_val_dt = np.reshape(x_val, [x_val.shape[0], -1]) x_test_dt = np.reshape(x_test, [x_test.shape[0], -1]) #some models (SVM flatten the y_train. we need to reinstate this. here, in this case) if len(y_train.shape) == 1: tmp = np.zeros((y_train.shape[0], y_val.shape[1])) # n_samples x n_classes tmp[np.arange(y_train.shape[0]), y_train] = 1 y_train = tmp clf = tree.DecisionTreeClassifier(random_state=random_state) clf.fit(x_train_dt, y_train) y_pred_train = clf.predict(x_train_dt) acc_train = helpers.accuracy(y_pred_train, y_train) y_pred_val = clf.predict(x_val_dt) acc_val = helpers.accuracy(y_pred_val, y_val) y_pred_test = clf.predict(x_test_dt) acc_test = helpers.accuracy(y_pred_test, y_test) importances = clf.feature_importances_ #collect results dtree_results = { 'acc_train': acc_train, 'acc_test': acc_test, 'acc_val': acc_val, 'y_pred_train': y_pred_train, 'y_pred_test': y_pred_test, 'y_pred_val': y_pred_val, 'importances': importances } t_end = time.time() #save results in file, in parallel to outputs.mat for the target model scipy.io.savemat('{}/outputs_dtree.mat'.format(model.path_dir()), dtree_results) # write report for terminal printing. only test_accuracy (ie the first line after the header) will be parsed by eval_score_logs dtree_report = '\n{}\n'.format(model.path_dir().replace( '/', ' ').replace(model_class.__name__, 'comp.DTree:{}'.format(model_class.__name__))) dtree_report += 'test accuracy : {}\n'.format( dtree_results['acc_test']) dtree_report += 'val accuracy : {}\n'.format( dtree_results['acc_val']) dtree_report += 'train accuracy : {}\n'.format( dtree_results['acc_train']) dtree_report += 'train-evaluation-sequence done after {}s\n\n'.format( t_end - t_start) print(dtree_report) #dump results to output of this run #again, in parallel to scores.txt for the target model with open('{}/scores_dtree.txt'.format(model.path_dir()), 'w') as f: f.write(dtree_report) #also write dree report into logfile logfile.write(dtree_report) logfile.flush()
def postprocess_relevance(self, *args, **kwargs): relevance = helpers.arrays_to_numpy(*args) return relevance
def postprocess_relevance(self, *args, **kwargs): """ postprocess relvance values by bringing them into a shape aligned to the data. """ return helpers.arrays_to_numpy(*args)
def evaluate_model(self, x_test, y_test, force_device=None, lower_upper=None): """ test model and computes relevance maps Parameters: ----------- x_test: array - shaped such that it is ready for consumption by the model y_test: array - expected test labels target_shape: list or tuple - the target output shape of the test data and relevance maps. force_device: str - (optional) force execution of the evaluation either on cpu or gpu. accepted values: "cpu", "gpu" respectively. None does nothing. lower_upper: (array of float, array of float) - (optional): lower and upper bounds of the inputs, for LRP_zB. automagically inferred from x_test. arrays should match the feature dimensionality of the inputs, including broadcastable axes. e.g. if x_test is shaped (N, featuredims), then the bounds should be shaped (1, featuredims) Returns: -------- results, packed in dictionary, as numpy arrays """ assert isinstance( self.model, Sequential ), "self.model should be modules.sequential.Sequentialm but is {}. ensure correct type by converting model after training.".format( type(self.model)) # remove the softmax output of the model. # this does not change the ranking of the outputs but is required for most LRP methods # self.model is required to be a modules.Sequential results = {} #prepare results dictionary #force model to specific device, if so desired. x_test, y_test = helpers.force_device(self, (x_test, y_test), force_device) print('...forward pass for {} test samples for model performance eval'. format(x_test.shape[0])) y_pred = self.model.forward(x_test) #evaluate accuracy and loss on cpu-copyies of prediction vectors y_pred_c, y_test_c = helpers.arrays_to_numpy(y_pred, y_test) results['acc'] = helpers.accuracy(y_test_c, y_pred_c) results['loss_l1'] = helpers.l1loss(y_test_c, y_pred_c) results['y_pred'] = y_pred_c #NOTE: drop softmax layer AFTER forward for performance measures to obtain competetive loss values self.model.drop_softmax_output_layer() #NOTE: second forward pass without softmax for relevance computation print('...forward pass for {} test samples (without softmax) for LRP'. format(x_test.shape[0])) y_pred = self.model.forward( x_test) # this is also a requirement for LRP # prepare initial relevance vectors for actual class and dominantly predicted class, on model-device (gpu or cpu) R_init_act = y_pred * y_test #assumes y_test to be binary matrix y_dom = (y_pred == y_pred.max(axis=1, keepdims=True)) R_init_dom = y_pred * y_dom #assumes prediction maxima are unique per sample # compute epsilon-lrp for all model layers for m in self.model.modules: m.set_lrp_parameters(lrp_var='epsilon', param=1e-5) print('...lrp (eps) for actual classes') results['R_pred_act_epsilon'] = self.model.lrp(R_init_act) print('...lrp (eps) for dominant classes') results['R_pred_dom_epsilon'] = self.model.lrp(R_init_dom) # eps + zB (lowest convolution/flatten layer) for all models here. # infer lower and upper bounds from data, if not given if not lower_upper: print( ' ...inferring per-channel lower and upper bounds for zB from test data. THIS IS PROBABLY NOT OPTIMAL' ) lower_upper = helpers.get_channel_wise_bounds(x_test) else: print(' ...using input lower and upper bounds for zB') if self.use_gpu: lower_upper = helpers.arrays_to_cupy(*lower_upper) else: lower_upper = helpers.arrays_to_numpy(*lower_upper) # configure the lowest weighted layer to be decomposed with zB. This should be the one nearest to the input. # We are not just taking the first layer, since the MLP models are starting with a Flatten layer for reshaping the data. for m in self.model.modules: if isinstance(m, (Linear, Convolution)): m.set_lrp_parameters(lrp_var='zB', param=lower_upper) break print('...lrp (eps + zB) for actual classes') results['R_pred_act_epsilon_zb'] = self.model.lrp(R_init_act) print('...lrp (eps + zB) for dominant classes') results['R_pred_dom_epsilon_zb'] = self.model.lrp(R_init_dom) # compute CNN composite rules, if model has convolution layes has_convolutions = False for m in self.model.modules: has_convolutions = has_convolutions or isinstance(m, Convolution) if has_convolutions: # convolution layers found. # epsilon-lrp with flat decomposition in the lowest convolution layers # process lowest convolution layer with FLAT lrp # for "normal" cnns, this should overwrite the previously set zB rule for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='flat') break print('...lrp (eps+flat) for actual classes') results['R_pred_act_epsilon_flat'] = self.model.lrp(R_init_act) print('...lrp (eps+flat) for dominant classes') results['R_pred_dom_epsilon_flat'] = self.model.lrp(R_init_dom) # preparing alpha2beta-1 for those layers for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='alpha', param=2.0) print('...lrp (composite:alpha=2) for actual classes') results['R_pred_act_composite_alpha2'] = self.model.lrp(R_init_act) print('...lrp (composite:alpha=2) for dominant classes') results['R_pred_dom_composite_alpha2'] = self.model.lrp(R_init_dom) # process lowest convolution layer with FLAT lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='flat') break print('...lrp (composite:alpha=2+flat) for actual classes') results['R_pred_act_composite_alpha2_flat'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=2+flat) for dominant classes') results['R_pred_dom_composite_alpha2_flat'] = self.model.lrp( R_init_dom) #process lowest convolution layer with zB lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='zB', param=lower_upper) break print('...lrp (composite:alpha=2+zB) for actual classes') results['R_pred_act_composite_alpha2_zB'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=2+zB) for dominant classes') results['R_pred_dom_composite_alpha2_zB'] = self.model.lrp( R_init_dom) # switching alpha1beta0 for those layers for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='alpha', param=1.0) print('...lrp (composite:alpha=1) for actual classes') results['R_pred_act_composite_alpha1'] = self.model.lrp(R_init_act) print('...lrp (composite:alpha=1) for dominant classes') results['R_pred_dom_composite_alpha1'] = self.model.lrp(R_init_dom) # process lowest convolution layer with FLAT lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='flat') break print('...lrp (composite:alpha=1+flat) for actual classes') results['R_pred_act_composite_alpha1_flat'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=1+flat) for dominant classes') results['R_pred_dom_composite_alpha1_flat'] = self.model.lrp( R_init_dom) #process lowest convolution layer with zB lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='zB', param=lower_upper) break print('...lrp (composite:alpha=1+zB) for actual classes') results['R_pred_act_composite_alpha1_zB'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=1+zB) for dominant classes') results['R_pred_dom_composite_alpha1_zB'] = self.model.lrp( R_init_dom) print('...copying collected results to CPU and reshaping if necessary') for key in results.keys(): tmp = helpers.arrays_to_numpy(results[key])[0] if key.startswith('R'): tmp = self.postprocess_relevance(tmp)[0] results[key] = tmp return results
def postprocess_relevance(self, *args): relevance = helpers.arrays_to_numpy(*args) #select previously added dummy axis explicitly return tuple([r[..., 0] for r in relevance])