Python arrays_to_numpy 예제들, helpers.arrays_to_numpy Python 예제들

예제 #1

0

파일 보기

파일: cnn.py 프로젝트: sebastian-lapuschkin/explaining-deep-gait-classification-gender

 def postprocess_relevance(self, *args):
     relevance = helpers.arrays_to_numpy(*args)
     #select previously added dummy axis explicitly for removal. then reshape to original signal again
     return tuple([
         self.input_shape_processor.backward(r[..., 0, 0])
         for r in relevance
     ])

예제 #2

0

파일 보기

    def preprocess_data(self, x_train, x_val, x_test, y_train, y_val, y_test):
        """
        prepare data and labels as input to the model.
        convert input multi-dim arrays into vectors
        """
        data = (x_train, x_val, x_test, y_train, y_val, y_test)
        if self.use_gpu:
            #move data to GPU if GPU execution is desired/possible
            data = helpers.arrays_to_cupy(*data)
        else:
            #otherwise, make sure the data is available to the CPU
            data = helpers.arrays_to_numpy(*data)

        return data

예제 #3

0

파일 보기

파일: cnn.py 프로젝트: sebastian-lapuschkin/explaining-deep-gait-classification-gender

    def preprocess_data(self, x_train, x_val, x_test, y_train, y_val, y_test):
        """
        prepare data and labels as input to the model.
        add channel axis to multi-dim arrays
        """

        #add additional 1-dim channel axis to training inputs
        data = (x_train[..., None], x_val[..., None], x_test[..., None],
                y_train, y_val, y_test)
        if self.use_gpu:
            #move data to GPU if GPU execution is desired/possible
            data = helpers.arrays_to_cupy(*data)
        else:
            #otherwise, make sure the data is available to the CPU
            data = helpers.arrays_to_numpy(*data)

        return data

예제 #4

0

파일 보기

파일: cnn.py 프로젝트: sebastian-lapuschkin/explaining-deep-gait-classification-gender

    def preprocess_data(self, x_train, x_val, x_test, y_train, y_val, y_test):
        """
        prepare data and labels as input to the model.
        convert input multi-dim arrays into vectors, and add channel axis
        """

        # convert 2d data to 1d data then add a 1-dim spatial axis and a 1-dim channel axis to training inputs
        data = (self.input_shape_processor.forward(x_train)[..., None, None],
                self.input_shape_processor.forward(x_val)[..., None, None],
                self.input_shape_processor.forward(x_test)[..., None, None],
                y_train, y_val, y_test)

        if self.use_gpu:
            #move data to GPU if GPU execution is desired/possible
            data = helpers.arrays_to_cupy(*data)
        else:
            #otherwise, make sure the data is available to the CPU
            data = helpers.arrays_to_numpy(*data)

        return data

예제 #5

0

파일 보기

def run_train_test_cycle(X,
                         Y,
                         L,
                         LS,
                         S,
                         P,
                         model_class,
                         output_root_dir,
                         data_name,
                         target_name,
                         training_programme=None,
                         do_this_if_model_exists='skip',
                         save_data_in_output_dir=True,
                         force_device_for_training=None,
                         force_device_for_evaluation=None,
                         do_xval=True,
                         decision_tree=False):
    """
    This script trains and evaluates a model using the given data X,Y over all splits as determined in S

    Parameters:
    -----------

    X : np.ndarray - An numpy.ndarray shaped (N, T, C), where N is the number of samples, T is the number
        of time points in the data and C is the number of channels per time point.

    Y : np.ndarray - An numpy.ndarray shaped (N, L), where N is the number of samples and L is the number of classes/labels

    L : list - a list of channel labels of length C, where C is the number of channels in the data.
        L holds textual descriptions of the data's channels

    LS: np.array - An numpy.ndarray shaped (N, S), where N is the number of samples and S is the number of existing subjects.
        Identifies the subject belonging to each datum X.
        Should run in parallel to the training labels Y

    S : list of lists - Contains indices determining the partitioning of the data.
        The outer lists groups the splits (ie len(S) groups of data) and each list element of S contains the indices of those lists.

    P : np.ndarray - An numpy.ndarray shaped (N,) describing the permutation applied to the input data X and the target labels Y.
        This allows referencing LS to Y and X.

    model: model_db.Model - a CLASS providing a set of required functions and the model architecture for executing the training and evaluation loop

    output_root_dir: str - a string pointing towards the root folder for writing results into.

    data_name: str - what is the data/feature type called? e.g. GRF or JA_X_Lower, ...

    target_name: str - what is the prediction target called? e.g. Subject, Gender or Injury, ...

    training_programme: (optional) ModelTraining class - If this parameter is not None, the model's default training regime will be overwritten
        with the passed ModelTraining class' train_model() function

    do_this_if_model_exists: str - variable controlling the training/evaluation behaviour if a trained model already exists
        at the model output location. options:
        retrain (do everything from scratch)
        load (load model and skip training, perform evaluation only)
        skip (completely skip, do nothing)

    save_data_in_output_dir: bool - controls wheter to save the experimental data (X, Y, L, LS, S) in the output directory

    force_device_for_training: str - values can be either gpu or cpu. force the use of this device during training.

    force_device_for_evaluation: str - values can either gpu or cpu. force the use of this device during evaluaton.
        here, the use of the GPU is almost always recommended due to the large batch size to be processed.

    do_xval: bool - controls wheter all data splits are run through a cross-evaluation scheme, or only data splits 0-2 are to be treated as dedicated training, validation and test splits

    decision_tree: bool - if True trains a decision tree model as a baseline/comparison option for the target model
    """

    # some basic sanity checks
    assert Y.shape[0] == X.shape[0] == LS.shape[
        0], 'Number of samples differ between labels Y (n={}), data X (n={}) and subject labels LS (n={})'.format(
            L.shape[0], X.shape[0], LS.shape[0])
    assert len(L) == X.shape[
        2], 'Number of provided channel names/labels in L (c={}) differs from number of channels in data X(c={})'.format(
            len(L), X.shape[2])
    assert sum([len(s) for s in S]) == X.shape[
        0], 'Number of samples distributed over splits in S (n={}) differs from number of samples in X ({})'.format(
            sum([len(s) for s in S]), X.shape[0])

    # save data, labels and split information in output directory.
    if save_data_in_output_dir:
        print('Saving training and evaluation data to {}'.format(
            output_root_dir))
        helpers.ensure_dir_exists(output_root_dir)
        scipy.io.savemat('{}/data.mat'.format(output_root_dir), {'X': X})
        scipy.io.savemat('{}/targets.mat'.format(output_root_dir), {'Y': Y})
        scipy.io.savemat('{}/channel_labels.mat'.format(output_root_dir),
                         {'L': L})
        scipy.io.savemat('{}/subject_labels.mat'.format(output_root_dir),
                         {'LS': LS})
        scipy.io.savemat('{}/splits.mat'.format(output_root_dir), {'S': S})
        scipy.io.savemat('{}/permutation.mat'.format(output_root_dir),
                         {'P': P})

    #prepare log to append anything happending in this session. kinda deprecated.
    logfile = open('{}/log.txt'.format(output_root_dir), 'a')

    # start main loop and execute training/evaluation for all the splits definied in S
    for split_index in range(len(S)):
        if split_index > 0 and not do_xval:
            cprint(
                colored(
                    'Cross-Validation has been disabled. Terminating after first iteration.',
                    'yellow'))
            #terminate here after one iteration, e.g. in case predetermined splits have been given.
            break

        model = model_class(output_root_dir, data_name, target_name,
                            split_index)
        model_dir = model.path_dir()
        helpers.ensure_dir_exists(model_dir)

        # this case: do nothing.
        if model.exists() and do_this_if_model_exists == 'skip':
            print('Model already exists at {}. skipping'.format(model_dir))
            continue  #skip remaining code, there is nothing to be done. please move along.

        # other cases: split data in any case. measure time. set output log
        t_start = time.time()

        # collect data indices from split table
        j_test = split_index
        i_test = S[j_test]
        j_val = (split_index + 1) % len(S)
        i_val = S[j_val]
        j_train = list(set(range(len(S))) - {j_test, j_val})
        i_train = []
        for j in j_train:
            i_train.extend(S[j])

        # collect data from indices
        x_train = X[i_train, ...]
        y_train = Y[i_train, ...]
        x_test = X[i_test, ...]
        y_test = Y[i_test, ...]
        x_val = X[i_val, ...]
        y_val = Y[i_val, ...]

        # remember shape of test data as originally given
        x_test_shape_orig = x_test.shape

        # model-specific data processing
        x_train, x_val, x_test, y_train, y_val, y_test =\
            model.preprocess_data(x_train, x_val, x_test, y_train, y_val, y_test)

        if not model.exists() or (model.exists()
                                  and do_this_if_model_exists == 'retrain'):
            model.build_model(x_train.shape, y_train.shape)
            if training_programme is not None:
                #this instance-based monkey-patching is not the best way to do it, but probably the most flexible one.
                model.train_model = types.MethodType(
                    training_programme.train_model, model)
            model.train_model(x_train,
                              y_train,
                              x_val,
                              y_val,
                              force_device=force_device_for_training)
            model.save_model()
        else:
            model.load_model()

        # compute test scores and relevance maps for model.
        results = model.evaluate_model(
            x_test,
            y_test,
            force_device=force_device_for_evaluation,
            lower_upper=helpers.get_channel_wise_bounds(x_train)
        )  # compute and give data bounds computed from training data.

        # measure time for training/evaluation cycle
        t_end = time.time()

        # write report for terminal printing
        report = '\n{}\n'.format(model.path_dir().replace('/', ' '))
        report += 'test accuracy : {}\n'.format(results['acc'])
        report += 'test loss (l1): {}\n'.format(results['loss_l1'])
        report += 'train-evaluation-sequence done after {}s\n\n'.format(
            t_end - t_start)
        print(report)

        #dump results to output of this run
        with open('{}/scores.txt'.format(model.path_dir()), 'w') as f:
            f.write(report)

        #also write results to parsable log file for eval_score_logs module
        logfile.write(report)
        logfile.flush()

        #dump evaluation results to mat file
        scipy.io.savemat('{}/outputs.mat'.format(model.path_dir()), results)

        if decision_tree:  # and (not model.exists() or (model.exists() and do_this_if_model_exists == 'retrain')):
            # DTree training and evaluation currently limited to settings where also the target model is trained.
            print('Training and evaluating alternative decision tree model')
            t_start = time.time()

            # make sure all data lives in CPU space for the DT model
            x_train, x_val, x_test, y_train, y_val, y_test =\
                 helpers.arrays_to_numpy(x_train, x_val, x_test, y_train, y_val, y_test)

            random_state = 42
            #prep data for DT models
            x_train_dt = np.reshape(x_train, [x_train.shape[0], -1])
            x_val_dt = np.reshape(x_val, [x_val.shape[0], -1])
            x_test_dt = np.reshape(x_test, [x_test.shape[0], -1])

            #some models (SVM flatten the y_train. we need to reinstate this. here, in this case)
            if len(y_train.shape) == 1:
                tmp = np.zeros((y_train.shape[0],
                                y_val.shape[1]))  # n_samples x n_classes
                tmp[np.arange(y_train.shape[0]), y_train] = 1
                y_train = tmp

            clf = tree.DecisionTreeClassifier(random_state=random_state)
            clf.fit(x_train_dt, y_train)

            y_pred_train = clf.predict(x_train_dt)
            acc_train = helpers.accuracy(y_pred_train, y_train)

            y_pred_val = clf.predict(x_val_dt)
            acc_val = helpers.accuracy(y_pred_val, y_val)

            y_pred_test = clf.predict(x_test_dt)
            acc_test = helpers.accuracy(y_pred_test, y_test)

            importances = clf.feature_importances_

            #collect results
            dtree_results = {
                'acc_train': acc_train,
                'acc_test': acc_test,
                'acc_val': acc_val,
                'y_pred_train': y_pred_train,
                'y_pred_test': y_pred_test,
                'y_pred_val': y_pred_val,
                'importances': importances
            }

            t_end = time.time()

            #save results in file, in parallel to outputs.mat for the target model
            scipy.io.savemat('{}/outputs_dtree.mat'.format(model.path_dir()),
                             dtree_results)

            # write report for terminal printing. only test_accuracy (ie the first line after the header) will be parsed by eval_score_logs
            dtree_report = '\n{}\n'.format(model.path_dir().replace(
                '/',
                ' ').replace(model_class.__name__,
                             'comp.DTree:{}'.format(model_class.__name__)))
            dtree_report += 'test accuracy : {}\n'.format(
                dtree_results['acc_test'])
            dtree_report += 'val accuracy : {}\n'.format(
                dtree_results['acc_val'])
            dtree_report += 'train accuracy : {}\n'.format(
                dtree_results['acc_train'])
            dtree_report += 'train-evaluation-sequence done after {}s\n\n'.format(
                t_end - t_start)
            print(dtree_report)

            #dump results to output of this run
            #again, in parallel to scores.txt for the target model
            with open('{}/scores_dtree.txt'.format(model.path_dir()),
                      'w') as f:
                f.write(dtree_report)

            #also write dree report into logfile
            logfile.write(dtree_report)
            logfile.flush()

예제 #6

0

파일 보기

파일: svm.py 프로젝트: sebastian-lapuschkin/explaining-deep-gait-classification-gender

 def postprocess_relevance(self, *args, **kwargs):
     relevance = helpers.arrays_to_numpy(*args)
     return relevance

예제 #7

0

파일 보기

파일: base.py 프로젝트: sebastian-lapuschkin/explaining-deep-gait-classification-gender

 def postprocess_relevance(self, *args, **kwargs):
     """
     postprocess relvance values by bringing them into a shape aligned to the data.
     """
     return helpers.arrays_to_numpy(*args)

예제 #8

0

파일 보기

파일: base.py 프로젝트: sebastian-lapuschkin/explaining-deep-gait-classification-gender

    def evaluate_model(self,
                       x_test,
                       y_test,
                       force_device=None,
                       lower_upper=None):
        """
        test model and computes relevance maps

        Parameters:
        -----------

        x_test: array - shaped such that it is ready for consumption by the model

        y_test: array - expected test labels

        target_shape: list or tuple - the target output shape of the test data and relevance maps.

        force_device: str - (optional) force execution of the evaluation either on cpu or gpu.
            accepted values: "cpu", "gpu" respectively. None does nothing.

        lower_upper: (array of float, array of float) - (optional): lower and upper bounds of the inputs, for LRP_zB.
            automagically inferred from x_test.
            arrays should match the feature dimensionality of the inputs, including broadcastable axes.
            e.g. if x_test is shaped (N, featuredims), then the bounds should be shaped (1, featuredims)

        Returns:
        --------

        results, packed in dictionary, as numpy arrays
        """

        assert isinstance(
            self.model, Sequential
        ), "self.model should be modules.sequential.Sequentialm but is {}. ensure correct type by converting model after training.".format(
            type(self.model))
        # remove the softmax output of the model.
        # this does not change the ranking of the outputs but is required for most LRP methods
        # self.model is required to be a modules.Sequential
        results = {}  #prepare results dictionary

        #force model to specific device, if so desired.
        x_test, y_test = helpers.force_device(self, (x_test, y_test),
                                              force_device)

        print('...forward pass for {} test samples for model performance eval'.
              format(x_test.shape[0]))
        y_pred = self.model.forward(x_test)

        #evaluate accuracy and loss on cpu-copyies of prediction vectors
        y_pred_c, y_test_c = helpers.arrays_to_numpy(y_pred, y_test)
        results['acc'] = helpers.accuracy(y_test_c, y_pred_c)
        results['loss_l1'] = helpers.l1loss(y_test_c, y_pred_c)
        results['y_pred'] = y_pred_c

        #NOTE: drop softmax layer AFTER forward for performance measures to obtain competetive loss values
        self.model.drop_softmax_output_layer()

        #NOTE: second forward pass without softmax for relevance computation
        print('...forward pass for {} test samples (without softmax) for LRP'.
              format(x_test.shape[0]))
        y_pred = self.model.forward(
            x_test)  # this is also a requirement for LRP

        # prepare initial relevance vectors for actual class and dominantly predicted class, on model-device (gpu or cpu)
        R_init_act = y_pred * y_test  #assumes y_test to be binary matrix

        y_dom = (y_pred == y_pred.max(axis=1, keepdims=True))
        R_init_dom = y_pred * y_dom  #assumes prediction maxima are unique per sample

        # compute epsilon-lrp for all model layers
        for m in self.model.modules:
            m.set_lrp_parameters(lrp_var='epsilon', param=1e-5)
        print('...lrp (eps) for actual classes')
        results['R_pred_act_epsilon'] = self.model.lrp(R_init_act)

        print('...lrp (eps) for dominant classes')
        results['R_pred_dom_epsilon'] = self.model.lrp(R_init_dom)

        # eps + zB (lowest convolution/flatten layer) for all models here.

        # infer lower and upper bounds from data, if not given
        if not lower_upper:
            print(
                '    ...inferring per-channel lower and upper bounds for zB from test data. THIS IS PROBABLY NOT OPTIMAL'
            )
            lower_upper = helpers.get_channel_wise_bounds(x_test)
        else:
            print('    ...using input lower and upper bounds for zB')
        if self.use_gpu:
            lower_upper = helpers.arrays_to_cupy(*lower_upper)
        else:
            lower_upper = helpers.arrays_to_numpy(*lower_upper)

        # configure the lowest weighted layer to be decomposed with zB. This should be the one nearest to the input.
        # We are not just taking the first layer, since the MLP models are starting with a Flatten layer for reshaping the data.
        for m in self.model.modules:
            if isinstance(m, (Linear, Convolution)):
                m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                break

        print('...lrp (eps + zB) for actual classes')
        results['R_pred_act_epsilon_zb'] = self.model.lrp(R_init_act)

        print('...lrp (eps + zB) for dominant classes')
        results['R_pred_dom_epsilon_zb'] = self.model.lrp(R_init_dom)

        # compute CNN composite rules, if model has convolution layes
        has_convolutions = False
        for m in self.model.modules:
            has_convolutions = has_convolutions or isinstance(m, Convolution)

        if has_convolutions:
            # convolution layers found.

            # epsilon-lrp with flat decomposition in the lowest convolution layers
            # process lowest convolution layer with FLAT lrp
            # for "normal" cnns, this should overwrite the previously set zB rule
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (eps+flat) for actual classes')
            results['R_pred_act_epsilon_flat'] = self.model.lrp(R_init_act)

            print('...lrp (eps+flat) for dominant classes')
            results['R_pred_dom_epsilon_flat'] = self.model.lrp(R_init_dom)

            # preparing alpha2beta-1 for those layers
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='alpha', param=2.0)

            print('...lrp (composite:alpha=2) for actual classes')
            results['R_pred_act_composite_alpha2'] = self.model.lrp(R_init_act)

            print('...lrp (composite:alpha=2) for dominant classes')
            results['R_pred_dom_composite_alpha2'] = self.model.lrp(R_init_dom)

            # process lowest convolution layer with FLAT lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (composite:alpha=2+flat) for actual classes')
            results['R_pred_act_composite_alpha2_flat'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=2+flat) for dominant classes')
            results['R_pred_dom_composite_alpha2_flat'] = self.model.lrp(
                R_init_dom)

            #process lowest convolution layer with zB lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                    break

            print('...lrp (composite:alpha=2+zB) for actual classes')
            results['R_pred_act_composite_alpha2_zB'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=2+zB) for dominant classes')
            results['R_pred_dom_composite_alpha2_zB'] = self.model.lrp(
                R_init_dom)

            # switching alpha1beta0 for those layers
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='alpha', param=1.0)

            print('...lrp (composite:alpha=1) for actual classes')
            results['R_pred_act_composite_alpha1'] = self.model.lrp(R_init_act)

            print('...lrp (composite:alpha=1) for dominant classes')
            results['R_pred_dom_composite_alpha1'] = self.model.lrp(R_init_dom)

            # process lowest convolution layer with FLAT lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (composite:alpha=1+flat) for actual classes')
            results['R_pred_act_composite_alpha1_flat'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=1+flat) for dominant classes')
            results['R_pred_dom_composite_alpha1_flat'] = self.model.lrp(
                R_init_dom)

            #process lowest convolution layer with zB lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                    break

            print('...lrp (composite:alpha=1+zB) for actual classes')
            results['R_pred_act_composite_alpha1_zB'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=1+zB) for dominant classes')
            results['R_pred_dom_composite_alpha1_zB'] = self.model.lrp(
                R_init_dom)

        print('...copying collected results to CPU and reshaping if necessary')
        for key in results.keys():
            tmp = helpers.arrays_to_numpy(results[key])[0]
            if key.startswith('R'):
                tmp = self.postprocess_relevance(tmp)[0]
            results[key] = tmp

        return results

예제 #9

0

파일 보기

파일: cnn.py 프로젝트: sebastian-lapuschkin/explaining-deep-gait-classification-gender

 def postprocess_relevance(self, *args):
     relevance = helpers.arrays_to_numpy(*args)
     #select previously added dummy axis explicitly
     return tuple([r[..., 0] for r in relevance])