def train_model(self, x_train, y_train, x_val, y_val, force_device=None):
     print('training {} model (3 epochs, default setting)'.format(
         self.__class__.__name__))
     x_train, y_train, x_val, y_val = helpers.force_device(
         self, (x_train, y_train, x_val, y_val), force_device)
     self.model.train(x_train,
                      y_train,
                      Xval=x_val,
                      Yval=y_val,
                      batchsize=5,
                      lrate=0.005,
                      status=500)  # train the model
     self.model.train(
         x_train,
         y_train,
         Xval=x_val,
         Yval=y_val,
         batchsize=5,
         lrate=0.001,
         status=500
     )  # slower training once the model has converged somewhat
     self.model.train(x_train,
                      y_train,
                      Xval=x_val,
                      Yval=y_val,
                      batchsize=5,
                      lrate=0.0005,
                      status=500)  # one last epoch
 def train_model(self, x_train, y_train, x_val, y_val, force_device=None):
     print('training {} model (3 epochs, decreasing batch size per epoch)'.
           format(self.__class__.__name__))
     x_train, y_train, x_val, y_val = helpers.force_device(
         self, (x_train, y_train, x_val, y_val), force_device)
     self.model.train(x_train,
                      y_train,
                      Xval=x_val,
                      Yval=y_val,
                      batchsize=128,
                      lrate=0.005,
                      status=1000,
                      iters=50000,
                      convergence=10)  # train the model
     self.model.train(
         x_train,
         y_train,
         Xval=x_val,
         Yval=y_val,
         batchsize=64,
         lrate=0.001,
         status=1000,
         iters=50000,
         convergence=10
     )  # slower training once the model has converged somewhat
     self.model.train(x_train,
                      y_train,
                      Xval=x_val,
                      Yval=y_val,
                      batchsize=32,
                      lrate=0.0005,
                      status=1000,
                      iters=50000,
                      convergence=10)  # one last epoch
     self.model.train(x_train,
                      y_train,
                      Xval=x_val,
                      Yval=y_val,
                      batchsize=8,
                      lrate=0.0005,
                      status=1000,
                      iters=50000,
                      convergence=10)  # one last last epoch
 def train_model(self, x_train, y_train, x_val, y_val, force_device=None):
     x_train, y_train, x_val, y_val = helpers.force_device(
         self, (x_train, y_train, x_val, y_val), force_device)
     print('training {} model (quick test)'.format(self.__class__.__name__))
     self.model.train(x_train, y_train, iters=10)
    def evaluate_model(self,
                       x_test,
                       y_test,
                       force_device=None,
                       lower_upper=None):
        """
        test model and computes relevance maps

        Parameters:
        -----------

        x_test: array - shaped such that it is ready for consumption by the model

        y_test: array - expected test labels

        target_shape: list or tuple - the target output shape of the test data and relevance maps.

        force_device: str - (optional) force execution of the evaluation either on cpu or gpu.
            accepted values: "cpu", "gpu" respectively. None does nothing.

        lower_upper: (array of float, array of float) - (optional): lower and upper bounds of the inputs, for LRP_zB.
            automagically inferred from x_test.
            arrays should match the feature dimensionality of the inputs, including broadcastable axes.
            e.g. if x_test is shaped (N, featuredims), then the bounds should be shaped (1, featuredims)

        Returns:
        --------

        results, packed in dictionary, as numpy arrays
        """

        assert isinstance(
            self.model, Sequential
        ), "self.model should be modules.sequential.Sequentialm but is {}. ensure correct type by converting model after training.".format(
            type(self.model))
        # remove the softmax output of the model.
        # this does not change the ranking of the outputs but is required for most LRP methods
        # self.model is required to be a modules.Sequential
        results = {}  #prepare results dictionary

        #force model to specific device, if so desired.
        x_test, y_test = helpers.force_device(self, (x_test, y_test),
                                              force_device)

        print('...forward pass for {} test samples for model performance eval'.
              format(x_test.shape[0]))
        y_pred = self.model.forward(x_test)

        #evaluate accuracy and loss on cpu-copyies of prediction vectors
        y_pred_c, y_test_c = helpers.arrays_to_numpy(y_pred, y_test)
        results['acc'] = helpers.accuracy(y_test_c, y_pred_c)
        results['loss_l1'] = helpers.l1loss(y_test_c, y_pred_c)
        results['y_pred'] = y_pred_c

        #NOTE: drop softmax layer AFTER forward for performance measures to obtain competetive loss values
        self.model.drop_softmax_output_layer()

        #NOTE: second forward pass without softmax for relevance computation
        print('...forward pass for {} test samples (without softmax) for LRP'.
              format(x_test.shape[0]))
        y_pred = self.model.forward(
            x_test)  # this is also a requirement for LRP

        # prepare initial relevance vectors for actual class and dominantly predicted class, on model-device (gpu or cpu)
        R_init_act = y_pred * y_test  #assumes y_test to be binary matrix

        y_dom = (y_pred == y_pred.max(axis=1, keepdims=True))
        R_init_dom = y_pred * y_dom  #assumes prediction maxima are unique per sample

        # compute epsilon-lrp for all model layers
        for m in self.model.modules:
            m.set_lrp_parameters(lrp_var='epsilon', param=1e-5)
        print('...lrp (eps) for actual classes')
        results['R_pred_act_epsilon'] = self.model.lrp(R_init_act)

        print('...lrp (eps) for dominant classes')
        results['R_pred_dom_epsilon'] = self.model.lrp(R_init_dom)

        # eps + zB (lowest convolution/flatten layer) for all models here.

        # infer lower and upper bounds from data, if not given
        if not lower_upper:
            print(
                '    ...inferring per-channel lower and upper bounds for zB from test data. THIS IS PROBABLY NOT OPTIMAL'
            )
            lower_upper = helpers.get_channel_wise_bounds(x_test)
        else:
            print('    ...using input lower and upper bounds for zB')
        if self.use_gpu:
            lower_upper = helpers.arrays_to_cupy(*lower_upper)
        else:
            lower_upper = helpers.arrays_to_numpy(*lower_upper)

        # configure the lowest weighted layer to be decomposed with zB. This should be the one nearest to the input.
        # We are not just taking the first layer, since the MLP models are starting with a Flatten layer for reshaping the data.
        for m in self.model.modules:
            if isinstance(m, (Linear, Convolution)):
                m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                break

        print('...lrp (eps + zB) for actual classes')
        results['R_pred_act_epsilon_zb'] = self.model.lrp(R_init_act)

        print('...lrp (eps + zB) for dominant classes')
        results['R_pred_dom_epsilon_zb'] = self.model.lrp(R_init_dom)

        # compute CNN composite rules, if model has convolution layes
        has_convolutions = False
        for m in self.model.modules:
            has_convolutions = has_convolutions or isinstance(m, Convolution)

        if has_convolutions:
            # convolution layers found.

            # epsilon-lrp with flat decomposition in the lowest convolution layers
            # process lowest convolution layer with FLAT lrp
            # for "normal" cnns, this should overwrite the previously set zB rule
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (eps+flat) for actual classes')
            results['R_pred_act_epsilon_flat'] = self.model.lrp(R_init_act)

            print('...lrp (eps+flat) for dominant classes')
            results['R_pred_dom_epsilon_flat'] = self.model.lrp(R_init_dom)

            # preparing alpha2beta-1 for those layers
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='alpha', param=2.0)

            print('...lrp (composite:alpha=2) for actual classes')
            results['R_pred_act_composite_alpha2'] = self.model.lrp(R_init_act)

            print('...lrp (composite:alpha=2) for dominant classes')
            results['R_pred_dom_composite_alpha2'] = self.model.lrp(R_init_dom)

            # process lowest convolution layer with FLAT lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (composite:alpha=2+flat) for actual classes')
            results['R_pred_act_composite_alpha2_flat'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=2+flat) for dominant classes')
            results['R_pred_dom_composite_alpha2_flat'] = self.model.lrp(
                R_init_dom)

            #process lowest convolution layer with zB lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                    break

            print('...lrp (composite:alpha=2+zB) for actual classes')
            results['R_pred_act_composite_alpha2_zB'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=2+zB) for dominant classes')
            results['R_pred_dom_composite_alpha2_zB'] = self.model.lrp(
                R_init_dom)

            # switching alpha1beta0 for those layers
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='alpha', param=1.0)

            print('...lrp (composite:alpha=1) for actual classes')
            results['R_pred_act_composite_alpha1'] = self.model.lrp(R_init_act)

            print('...lrp (composite:alpha=1) for dominant classes')
            results['R_pred_dom_composite_alpha1'] = self.model.lrp(R_init_dom)

            # process lowest convolution layer with FLAT lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (composite:alpha=1+flat) for actual classes')
            results['R_pred_act_composite_alpha1_flat'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=1+flat) for dominant classes')
            results['R_pred_dom_composite_alpha1_flat'] = self.model.lrp(
                R_init_dom)

            #process lowest convolution layer with zB lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                    break

            print('...lrp (composite:alpha=1+zB) for actual classes')
            results['R_pred_act_composite_alpha1_zB'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=1+zB) for dominant classes')
            results['R_pred_dom_composite_alpha1_zB'] = self.model.lrp(
                R_init_dom)

        print('...copying collected results to CPU and reshaping if necessary')
        for key in results.keys():
            tmp = helpers.arrays_to_numpy(results[key])[0]
            if key.startswith('R'):
                tmp = self.postprocess_relevance(tmp)[0]
            results[key] = tmp

        return results