Example #1
0
 def __init__(self, attr, count=1, limit=None, assure=False, **kwargs):
     """
     Parameters
     ----------
     attr : str or list(str)
       Name of the to-be-permuted attribute. This can also be a list of
       attribute names, in which case the *identical* shuffling is applied to
       all listed attributes.
     count : int
       Number of permutations to yielded by .generate()
     limit : None or str or dict
       If ``None`` all attribute values will be permuted. If an single
       attribute name is given, its unique values will be used to define
       chunks of data that are permuted individually (i.e. no attributed
       values will be replaced across chunks). Finally, if a dictionary is
       provided, its keys define attribute names and its values (single value
       or sequence thereof) attribute value, where all key-value combinations
       across all given items define a "selection" of to-be-permuted samples
       or features.
     assure : bool
       If set, by-chance non-permutations will be prevented, i.e. it is
       checked that at least two items change their position. Since this
       check adds a runtime penalty it is off by default.
     """
     Node.__init__(self, **kwargs)
     self._pattr = attr
     self.nruns = count
     self._limit = limit
     self._pcfg = None
     self._assure_permute = assure
Example #2
0
 def __init__(self,
              count=None,
              selection_strategy='equidistant',
              attr='chunks',
              space='partitions',
              **kwargs):
     """
     Parameters
     ----------
     count : None or int
       Desired number of splits to be output. It is limited by the
       number of splits possible for a given splitter
       (e.g. `OddEvenSplitter` can have only up to 2 splits). If None,
       all splits are output (default).
     selection_strategy : str
       If `count` is not None, possible strategies are possible:
       'first': First `count` splits are chosen;
       'random': Random (without replacement) `count` splits are chosen;
       'equidistant': Splits which are equidistant from each other.
     attr : str
       Sample attribute used to determine splits.
     space : str
       Name of the to be created sample attribute defining the partitions.
       In addition, a dataset attribute named '`space`_set' will be added
       to each output dataset, indicating the number of the partition set
       it corresponds to.
     """
     Node.__init__(self, space=space, **kwargs)
     # pylint happyness block
     self.__splitattr = attr
     # we don't check it, thus no reason to make it private.
     # someone might find it useful to change post creation
     # TODO utilize such (or similar) policy through out the code
     self.count = count
     self._set_selection_strategy(selection_strategy)
Example #3
0
File: fx.py Project: esc/PyMVPA
 def __init__(self, fx, space, **kwargs):
     """
     Parameters
     ----------
     fx : callable
       Callable that is passed with the dataset samples as first and
       attribute values as second argument.
     space : str
       name of the sample attribute that contains the target values.
     """
     Node.__init__(self, space=space, **kwargs)
     self.fx = fx
Example #4
0
File: base.py Project: esc/PyMVPA
 def __init__(self, count, space='repetitons', **kwargs):
     """
     Parameters
     ----------
     count : int
       Positive integer that set the numbed of repetitions.
     space : str
       The name of the dataset attribute that will hold the actual repetiton
       in the yielded datasets.
     """
     Node.__init__(self, space=space, **kwargs)
     self.nruns = count
Example #5
0
File: base.py Project: esc/PyMVPA
 def __init__(self, includes, *args, **kwargs):
     """
     Parameters
     ----------
     includes : list
       List of tuples rules (attribute, unique_values) where all
       listed 'unique_values' must be present in the dataset.
       Matching samples or features get selected to proceed to the
       next rule in the list.  If at some point not all listed
       values of the attribute are present, dataset does not pass
       through the 'Sifter'.
     """
     Node.__init__(self, *args, **kwargs)
     self._includes = includes
Example #6
0
 def __init__(self,
              amount='equal',
              attr='targets',
              count=1,
              limit='chunks',
              apply_selection=False,
              space='balanced_set',
              **kwargs):
     """
     Parameters
     ----------
     amount : {'equal'} or int or float
       Specify the amount of elements to be selected (within the current
       ``limit``). The amount can be given as an integer value corresponding
       to the absolute number of elements per unique attribute (see ``attr``)
       value, as a float corresponding to the fraction of elements, or with
       the keyword 'equal'. In the latter case the number of to be selected
       elements is determined by the least number of available elements for
       any given unique attribute value within the current limit.
     attr : str
       Dataset attribute whose unique values define element classes that are
       to be balanced in number.
     count : int
       How many iterations to perform on ``generate()``.
     limit : None or str or dict
       If ``None`` the whole dataset is considered as one. If an single
       attribute name is given, its unique values will be used to define
       chunks of data that are balanced individually. Finally, if a
       dictionary is provided, its keys define attribute names and its values
       (single value or sequence thereof) attribute value, where all
       key-value combinations across all given items define a "selection" of
       to-be-balanced samples or features.
     apply_selection : bool
       Flag whether the balanced selection shall be applied, i.e. the output
       dataset only contains selected elements. If False, the selection is
       instead added as an attribute that merely marks selected elements (see
       ``space`` argument).
     space : str
       Name of the selection marker attribute in the output dataset that is
       created if the balanced selection is not applied to the output dataset
       (see ``apply_selection`` argument).
     """
     Node.__init__(self, space=space, **kwargs)
     self._amount = amount
     self._attr = attr
     self.nruns = count
     self._limit = limit
     self._limit_filter = None
     self._apply_selection = apply_selection
Example #7
0
 def __init__(self, auto_train=False, force_train=False, **kwargs):
     """
     Parameters
     ----------
     auto_train : bool
       Flag whether the learner will automatically train itself on the input
       dataset when called untrained.
     force_train : bool
       Flag whether the learner will enforce training on the input dataset
       upon every call.
     **kwargs
       All arguments are passed to the baseclass.
     """
     Node.__init__(self, **kwargs)
     self.__is_trained = False
     self.__auto_train = auto_train
     self.__force_train = force_train
Example #8
0
 def __init__(self, space, prestrip, poststrip, **kwargs):
     """
     Parameters
     ----------
     space : str
       name of the sample attribute that shall be used to determine the
       boundaries.
     prestrip : int
       Number of samples to be stripped prior to each boundary.
     poststrip : int
       Number of samples to be stripped after each boundary (this includes
       the boundary sample itself, i.e. the first samples with a different
       sample attribute value).
     """
     Node.__init__(self, space=space, **kwargs)
     self._prestrip = prestrip
     self._poststrip = poststrip
Example #9
0
 def __init__(self, attr, attr_values=None, count=None, noslicing=False,
              reverse=False, ignore_values=None, **kwargs):
     """
     Parameters
     ----------
     attr : str
       Typically the sample or feature attribute used to determine splits.
     attr_values : tuple
       If not None, this is a list of value of the ``attr`` used to determine
       the splits. The order of values in this list defines the order of the
       resulting splits. It is possible to specify a particular value
       multiple times. All dataset samples with values that are not listed
       are going to be ignored.
     count : None or int
       Desired number of generated splits. If None, all splits are output
       (default), otherwise the number of splits is limited to the given
       ``count`` or the maximum number of possible split (whatever is less).
     noslicing : bool
       If True, dataset splitting is not done by slicing (causing
       shared data between source and split datasets) even if it would
       be possible. By default slicing is performed whenever possible
       to reduce the memory footprint.
     reverse : bool
       If True, the order of datasets in the split is reversed, e.g.
       instead of (training, testing), (training, testing) will be spit
       out.
     ignore_values : tuple
       If not None, this is a list of value of the ``attr`` the shall be
       ignored when determining the splits. This settings also affects
       any specified ``attr_values``.
     """
     Node.__init__(self, space=attr, **kwargs)
     self.__splitattr_values = attr_values
     self.__splitattr_ignore = ignore_values
     self.__count = count
     self.__noslicing = noslicing
     self.__reverse = reverse