def train_model(self, x_train, y_train, x_val, y_val, force_device=None): print('training {} model (3 epochs, default setting)'.format( self.__class__.__name__)) x_train, y_train, x_val, y_val = helpers.force_device( self, (x_train, y_train, x_val, y_val), force_device) self.model.train(x_train, y_train, Xval=x_val, Yval=y_val, batchsize=5, lrate=0.005, status=500) # train the model self.model.train( x_train, y_train, Xval=x_val, Yval=y_val, batchsize=5, lrate=0.001, status=500 ) # slower training once the model has converged somewhat self.model.train(x_train, y_train, Xval=x_val, Yval=y_val, batchsize=5, lrate=0.0005, status=500) # one last epoch
def train_model(self, x_train, y_train, x_val, y_val, force_device=None): print('training {} model (3 epochs, decreasing batch size per epoch)'. format(self.__class__.__name__)) x_train, y_train, x_val, y_val = helpers.force_device( self, (x_train, y_train, x_val, y_val), force_device) self.model.train(x_train, y_train, Xval=x_val, Yval=y_val, batchsize=128, lrate=0.005, status=1000, iters=50000, convergence=10) # train the model self.model.train( x_train, y_train, Xval=x_val, Yval=y_val, batchsize=64, lrate=0.001, status=1000, iters=50000, convergence=10 ) # slower training once the model has converged somewhat self.model.train(x_train, y_train, Xval=x_val, Yval=y_val, batchsize=32, lrate=0.0005, status=1000, iters=50000, convergence=10) # one last epoch self.model.train(x_train, y_train, Xval=x_val, Yval=y_val, batchsize=8, lrate=0.0005, status=1000, iters=50000, convergence=10) # one last last epoch
def train_model(self, x_train, y_train, x_val, y_val, force_device=None): x_train, y_train, x_val, y_val = helpers.force_device( self, (x_train, y_train, x_val, y_val), force_device) print('training {} model (quick test)'.format(self.__class__.__name__)) self.model.train(x_train, y_train, iters=10)
def evaluate_model(self, x_test, y_test, force_device=None, lower_upper=None): """ test model and computes relevance maps Parameters: ----------- x_test: array - shaped such that it is ready for consumption by the model y_test: array - expected test labels target_shape: list or tuple - the target output shape of the test data and relevance maps. force_device: str - (optional) force execution of the evaluation either on cpu or gpu. accepted values: "cpu", "gpu" respectively. None does nothing. lower_upper: (array of float, array of float) - (optional): lower and upper bounds of the inputs, for LRP_zB. automagically inferred from x_test. arrays should match the feature dimensionality of the inputs, including broadcastable axes. e.g. if x_test is shaped (N, featuredims), then the bounds should be shaped (1, featuredims) Returns: -------- results, packed in dictionary, as numpy arrays """ assert isinstance( self.model, Sequential ), "self.model should be modules.sequential.Sequentialm but is {}. ensure correct type by converting model after training.".format( type(self.model)) # remove the softmax output of the model. # this does not change the ranking of the outputs but is required for most LRP methods # self.model is required to be a modules.Sequential results = {} #prepare results dictionary #force model to specific device, if so desired. x_test, y_test = helpers.force_device(self, (x_test, y_test), force_device) print('...forward pass for {} test samples for model performance eval'. format(x_test.shape[0])) y_pred = self.model.forward(x_test) #evaluate accuracy and loss on cpu-copyies of prediction vectors y_pred_c, y_test_c = helpers.arrays_to_numpy(y_pred, y_test) results['acc'] = helpers.accuracy(y_test_c, y_pred_c) results['loss_l1'] = helpers.l1loss(y_test_c, y_pred_c) results['y_pred'] = y_pred_c #NOTE: drop softmax layer AFTER forward for performance measures to obtain competetive loss values self.model.drop_softmax_output_layer() #NOTE: second forward pass without softmax for relevance computation print('...forward pass for {} test samples (without softmax) for LRP'. format(x_test.shape[0])) y_pred = self.model.forward( x_test) # this is also a requirement for LRP # prepare initial relevance vectors for actual class and dominantly predicted class, on model-device (gpu or cpu) R_init_act = y_pred * y_test #assumes y_test to be binary matrix y_dom = (y_pred == y_pred.max(axis=1, keepdims=True)) R_init_dom = y_pred * y_dom #assumes prediction maxima are unique per sample # compute epsilon-lrp for all model layers for m in self.model.modules: m.set_lrp_parameters(lrp_var='epsilon', param=1e-5) print('...lrp (eps) for actual classes') results['R_pred_act_epsilon'] = self.model.lrp(R_init_act) print('...lrp (eps) for dominant classes') results['R_pred_dom_epsilon'] = self.model.lrp(R_init_dom) # eps + zB (lowest convolution/flatten layer) for all models here. # infer lower and upper bounds from data, if not given if not lower_upper: print( ' ...inferring per-channel lower and upper bounds for zB from test data. THIS IS PROBABLY NOT OPTIMAL' ) lower_upper = helpers.get_channel_wise_bounds(x_test) else: print(' ...using input lower and upper bounds for zB') if self.use_gpu: lower_upper = helpers.arrays_to_cupy(*lower_upper) else: lower_upper = helpers.arrays_to_numpy(*lower_upper) # configure the lowest weighted layer to be decomposed with zB. This should be the one nearest to the input. # We are not just taking the first layer, since the MLP models are starting with a Flatten layer for reshaping the data. for m in self.model.modules: if isinstance(m, (Linear, Convolution)): m.set_lrp_parameters(lrp_var='zB', param=lower_upper) break print('...lrp (eps + zB) for actual classes') results['R_pred_act_epsilon_zb'] = self.model.lrp(R_init_act) print('...lrp (eps + zB) for dominant classes') results['R_pred_dom_epsilon_zb'] = self.model.lrp(R_init_dom) # compute CNN composite rules, if model has convolution layes has_convolutions = False for m in self.model.modules: has_convolutions = has_convolutions or isinstance(m, Convolution) if has_convolutions: # convolution layers found. # epsilon-lrp with flat decomposition in the lowest convolution layers # process lowest convolution layer with FLAT lrp # for "normal" cnns, this should overwrite the previously set zB rule for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='flat') break print('...lrp (eps+flat) for actual classes') results['R_pred_act_epsilon_flat'] = self.model.lrp(R_init_act) print('...lrp (eps+flat) for dominant classes') results['R_pred_dom_epsilon_flat'] = self.model.lrp(R_init_dom) # preparing alpha2beta-1 for those layers for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='alpha', param=2.0) print('...lrp (composite:alpha=2) for actual classes') results['R_pred_act_composite_alpha2'] = self.model.lrp(R_init_act) print('...lrp (composite:alpha=2) for dominant classes') results['R_pred_dom_composite_alpha2'] = self.model.lrp(R_init_dom) # process lowest convolution layer with FLAT lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='flat') break print('...lrp (composite:alpha=2+flat) for actual classes') results['R_pred_act_composite_alpha2_flat'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=2+flat) for dominant classes') results['R_pred_dom_composite_alpha2_flat'] = self.model.lrp( R_init_dom) #process lowest convolution layer with zB lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='zB', param=lower_upper) break print('...lrp (composite:alpha=2+zB) for actual classes') results['R_pred_act_composite_alpha2_zB'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=2+zB) for dominant classes') results['R_pred_dom_composite_alpha2_zB'] = self.model.lrp( R_init_dom) # switching alpha1beta0 for those layers for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='alpha', param=1.0) print('...lrp (composite:alpha=1) for actual classes') results['R_pred_act_composite_alpha1'] = self.model.lrp(R_init_act) print('...lrp (composite:alpha=1) for dominant classes') results['R_pred_dom_composite_alpha1'] = self.model.lrp(R_init_dom) # process lowest convolution layer with FLAT lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='flat') break print('...lrp (composite:alpha=1+flat) for actual classes') results['R_pred_act_composite_alpha1_flat'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=1+flat) for dominant classes') results['R_pred_dom_composite_alpha1_flat'] = self.model.lrp( R_init_dom) #process lowest convolution layer with zB lrp for m in self.model.modules: if isinstance(m, Convolution): m.set_lrp_parameters(lrp_var='zB', param=lower_upper) break print('...lrp (composite:alpha=1+zB) for actual classes') results['R_pred_act_composite_alpha1_zB'] = self.model.lrp( R_init_act) print('...lrp (composite:alpha=1+zB) for dominant classes') results['R_pred_dom_composite_alpha1_zB'] = self.model.lrp( R_init_dom) print('...copying collected results to CPU and reshaping if necessary') for key in results.keys(): tmp = helpers.arrays_to_numpy(results[key])[0] if key.startswith('R'): tmp = self.postprocess_relevance(tmp)[0] results[key] = tmp return results