Beispiel #1
0
    def __init__(self,
                 data_measure,
                 transfer_error,
                 bestdetector=BestDetector(),
                 stopping_criterion=NBackHistoryStopCrit(BestDetector()),
                 feature_selector=FixedNElementTailSelector(1,
                                                            tail='upper',
                                                            mode='select'),
                 **kwargs
                 ):
        """Initialize incremental feature search

        Parameters
        ----------
        data_measure : DatasetMeasure
          Computed for each candidate feature selection. The measure has
          to compute a scalar value.
        transfer_error : TransferError
          Compute against a test dataset for each incremental feature
          set.
        bestdetector : Functor
          Given a list of error values it has to return a boolean that
          signals whether the latest error value is the total minimum.
        stopping_criterion : Functor
          Given a list of error values it has to return whether the
          criterion is fulfilled.
        """
        # bases init first
        FeatureSelection.__init__(self, **kwargs)

        self.__data_measure = data_measure
        self.__transfer_error = transfer_error
        self.__feature_selector = feature_selector
        self.__bestdetector = bestdetector
        self.__stopping_criterion = stopping_criterion
Beispiel #2
0
    def __init__(self,
                 data_measure,
                 transfer_error,
                 bestdetector=BestDetector(),
                 stopping_criterion=NBackHistoryStopCrit(BestDetector()),
                 feature_selector=FixedNElementTailSelector(1,
                                                            tail='upper',
                                                            mode='select'),
                 **kwargs):
        """Initialize incremental feature search

        Parameters
        ----------
        data_measure : DatasetMeasure
          Computed for each candidate feature selection. The measure has
          to compute a scalar value.
        transfer_error : TransferError
          Compute against a test dataset for each incremental feature
          set.
        bestdetector : Functor
          Given a list of error values it has to return a boolean that
          signals whether the latest error value is the total minimum.
        stopping_criterion : Functor
          Given a list of error values it has to return whether the
          criterion is fulfilled.
        """
        # bases init first
        FeatureSelection.__init__(self, **kwargs)

        self.__data_measure = data_measure
        self.__transfer_error = transfer_error
        self.__feature_selector = feature_selector
        self.__bestdetector = bestdetector
        self.__stopping_criterion = stopping_criterion
 def __init__(self,val=0,n_select=0.05):
     FeatureSelection.__init__(self)
     self.__ttfm = TTestFeaturewiseMeasure(val)
     self._combinedSA = CombinedFeaturewiseDatasetMeasure(analyzers=[self.__ttfm,OneWayAnovaProbs()], combiner = np.prod)
     if n_select<1 and n_select>0:
         self._selector=FractionTailSelector(n_select, mode='select', tail='lower')
     elif n_select>1:
         self._selector=FixedNElementTailSelector(int(n_select), mode='select', tail='lower')
     else:
         raise ValueError("n_select must be > 0, is %f"%n_select)
     self.__fs = SensitivityBasedFeatureSelection(
                     self._combinedSA,
                     self._selector,
                     enable_states = ["sensitivity", "selected_ids"],
                 )
    def __init__(self,
                 test_type = "ks",
                 num_features = 10,
                 **kwargs
                 ):
        """Initialize feature selection

        :Parameters:
          test_type : 'anova', 'ttest', 'ks', 'svm'
            type of statistical test to perform
          num_features : 
            Given a sensitivity map it has to return the ids of those
            features that should be kept.

        """

        # base init first
        FeatureSelection.__init__(self, **kwargs)

        self.__test_type = test_type
        """Test-type, One of 'anova', 'ttest', 'ks', 'svm'"""

        self.__num_features = num_features
        """Number of features to include"""
Beispiel #5
0
    def __init__(self,
                 sensitivity_analyzer,
                 transfer_error,
                 feature_selector=FractionTailSelector(0.05),
                 bestdetector=BestDetector(),
                 stopping_criterion=NBackHistoryStopCrit(BestDetector()),
                 train_clf=None,
                 update_sensitivity=True,
                 **kargs
                 ):
        # XXX Allow for multiple stopping criterions, e.g. error not decreasing
        # anymore OR number of features less than threshold
        """Initialize recursive feature elimination

        :Parameters:
            sensitivity_analyzer : FeaturewiseDatasetMeasure object
            transfer_error : TransferError object
                used to compute the transfer error of a classifier based on a
                certain feature set on the test dataset.
                NOTE: If sensitivity analyzer is based on the same
                classifier as transfer_error is using, make sure you
                initialize transfer_error with train=False, otherwise
                it would train classifier twice without any necessity.
            feature_selector : Functor
                Given a sensitivity map it has to return the ids of those
                features that should be kept.
            bestdetector : Functor
                Given a list of error values it has to return a boolean that
                signals whether the latest error value is the total minimum.
            stopping_criterion : Functor
                Given a list of error values it has to return whether the
                criterion is fulfilled.
            train_clf : bool
                Flag whether the classifier in `transfer_error` should be
                trained before computing the error. In general this is
                required, but if the `sensitivity_analyzer` and
                `transfer_error` share and make use of the same classifier it
                can be switched off to save CPU cycles. Default `None` checks
                if sensitivity_analyzer is based on a classifier and doesn't train
                if so.
            update_sensitivity : bool
                If False the sensitivity map is only computed once and reused
                for each iteration. Otherwise the senstitivities are
                recomputed at each selection step.
        """

        # base init first
        FeatureSelection.__init__(self, **kargs)

        self.__sensitivity_analyzer = sensitivity_analyzer
        """Sensitivity analyzer used to call at each step."""

        self.__transfer_error = transfer_error
        """Compute transfer error for each feature set."""

        self.__feature_selector = feature_selector
        """Functor which takes care about removing some features."""

        self.__stopping_criterion = stopping_criterion

        self.__bestdetector = bestdetector

        if train_clf is None:
            self.__train_clf = isinstance(sensitivity_analyzer,
                                          Sensitivity)
        else:
            self.__train_clf = train_clf
            """Flag whether training classifier is required."""

        self.__update_sensitivity = update_sensitivity
        """Flag whether sensitivity map is recomputed for each step."""

        # force clf training when sensitivities are not updated as otherwise
        # shared classifiers are not retrained
        if not self.__update_sensitivity \
               and isinstance(self.__transfer_error, ClassifierError) \
               and not self.__train_clf:
            if __debug__:
                debug("RFEC", "Forcing training of classifier since " +
                      "sensitivities aren't updated at each step")
            self.__train_clf = True
Beispiel #6
0
    def __init__(self,
                 sensitivity_analyzer,
                 transfer_error,
                 feature_selector=FractionTailSelector(0.05),
                 bestdetector=BestDetector(),
                 stopping_criterion=NBackHistoryStopCrit(BestDetector()),
                 train_clf=None,
                 update_sensitivity=True,
                 **kargs
                 ):
        # XXX Allow for multiple stopping criterions, e.g. error not decreasing
        # anymore OR number of features less than threshold
        """Initialize recursive feature elimination

        Parameters
        ----------
        sensitivity_analyzer : FeaturewiseDatasetMeasure object
        transfer_error : TransferError object
          used to compute the transfer error of a classifier based on a
          certain feature set on the test dataset.
          NOTE: If sensitivity analyzer is based on the same
          classifier as transfer_error is using, make sure you
          initialize transfer_error with train=False, otherwise
          it would train classifier twice without any necessity.
        feature_selector : Functor
          Given a sensitivity map it has to return the ids of those
          features that should be kept.
        bestdetector : Functor
          Given a list of error values it has to return a boolean that
          signals whether the latest error value is the total minimum.
        stopping_criterion : Functor
          Given a list of error values it has to return whether the
          criterion is fulfilled.
        train_clf : bool
          Flag whether the classifier in `transfer_error` should be
          trained before computing the error. In general this is
          required, but if the `sensitivity_analyzer` and
          `transfer_error` share and make use of the same classifier it
          can be switched off to save CPU cycles. Default `None` checks
          if sensitivity_analyzer is based on a classifier and doesn't train
          if so.
        update_sensitivity : bool
          If False the sensitivity map is only computed once and reused
          for each iteration. Otherwise the senstitivities are
          recomputed at each selection step.
        """

        # base init first
        FeatureSelection.__init__(self, **kargs)

        self.__sensitivity_analyzer = sensitivity_analyzer
        """Sensitivity analyzer used to call at each step."""

        self.__transfer_error = transfer_error
        """Compute transfer error for each feature set."""

        self.__feature_selector = feature_selector
        """Functor which takes care about removing some features."""

        self.__stopping_criterion = stopping_criterion

        self.__bestdetector = bestdetector

        if train_clf is None:
            self.__train_clf = isinstance(sensitivity_analyzer,
                                          Sensitivity)
        else:
            self.__train_clf = train_clf
            """Flag whether training classifier is required."""

        self.__update_sensitivity = update_sensitivity
        """Flag whether sensitivity map is recomputed for each step."""

        # force clf training when sensitivities are not updated as otherwise
        # shared classifiers are not retrained
        if not self.__update_sensitivity \
               and isinstance(self.__transfer_error, ClassifierError) \
               and not self.__train_clf:
            if __debug__:
                debug("RFEC", "Forcing training of classifier since " +
                      "sensitivities aren't updated at each step")
            self.__train_clf = True