def __init__(self, bands, border=2, baseline_interval=None, data_interval=None, Fs=1000.0, *args, **kwargs): """Initialize the FrequencyBandPowerMapper :Parameters: bands: list of 2-tuples list of (fl,fh), edge-frequencies for filtering border: int order of the filter to be used baseline_interval: 2-tuple of ints indices of the interval to be used for baseline correction data_interval: 2-tuple of ints indices of the interval to be used for cluster search args, kwargs: additional arguments passed to parent class """ Mapper.__init__(self,*args,**kwargs) self._bands = bands self._border = border self._baseline_interval = baseline_interval self._data_interval = data_interval self._Fs = Fs #TODO: Perform some checks, for valid intervals etc. #precalculate filter parameters (butter) self._bs = list(np.zeros((len(bands)))) self._as = list(np.zeros((len(bands)))) for i_b in range(len(bands)): [self._bs[i_b],self._as[i_b]]=butter(border,[bands[i_b][0]/(Fs/2),bands[i_b][1]/(Fs/2)], btype="band")
def __init__(self, node, nodeargs=None, inspace=None): """ Parameters ---------- node : mdp.Node instance This node instance is taken as the pristine source of which a copy is made for actual processing upon each training attempt. nodeargs : dict Dictionary for additional arguments for all calls to the MDP node. The dictionary key's meaning is as follows: 'train' Arguments for calls to `Node.train()` 'stoptrain' Arguments for calls to `Node.stop_training()` 'exec' Arguments for calls to `Node.execute()` 'inv' Arguments for calls to `Node.inverse()` The value for each item is always a 2-tuple, consisting of a tuple (for the arguments), and a dictionary (for keyword arguments), i.e. ((), {}). Both, tuple and dictionary have to be provided even if they are empty. inspace : see base class """ # TODO: starting from MDP2.5 this check should become: # TODO: if node.has_multiple_training_phases(): if not len(node._train_seq) == 1: raise ValueError("MDPNodeMapper does not support MDP nodes with " "multiple training phases.") Mapper.__init__(self, inspace=inspace) self.__pristine_node = None self.node = node self.nodeargs = nodeargs
def __init__(self, num, window=None, chunks_attr=None, position_attr=None, attr_strategy='remove', inspace=None): """ Parameters ---------- num : int Number of output samples. If operating on chunks, this is the number of samples per chunk. window : str or float or tuple Passed to scipy.signal.resample chunks_attr : str or None If not None, this samples attribute defines chunks that will be resampled individually. position_attr : str A samples attribute with positional information that is passed to scipy.signal.resample. If not None, the output dataset will also contain a sample attribute of this name, with updated positional information (this is, however, only meaningful for equally spaced samples). attr_strategy : {'remove', 'sample', 'resample'} Strategy to process sample attributes during mapping. 'remove' will cause all sample attributes to be removed. 'sample' will pick orginal attribute values matching the new resampling frequency (e.g. every 10th), and 'resample' will also apply the actual data resampling procedure to the attributes as well (which might not be possible, e.g. for literal attributes). """ Mapper.__init__(self, inspace=inspace) self.__num = num self.__window_args = window self.__chunks_attr = chunks_attr self.__position_attr = position_attr self.__attr_strategy = attr_strategy
def __init__(self, params=None, param_est=None, chunks_attr="chunks", dtype="float64", **kwargs): """ Parameters ---------- params : None or tuple(mean, std) or dict Fixed Z-Scoring parameters (mean, standard deviation). If provided, no parameters are estimated from the data. It is possible to specify individual parameters for each chunk by passing a dictionary with the chunk ids as keys and the parameter tuples as values. If None, parameters will be estimated from the training data. param_est : None or tuple(attrname, attrvalues) Limits the choice of samples used for automatic parameter estimation to a specific subset identified by a set of a given sample attribute values. The tuple should have the name of that sample attribute as the first element, and a sequence of attribute values as the second element. If None, all samples will be used for parameter estimation. chunks_attr : str or None If provided, it specifies the name of a samples attribute in the training data, unique values of which will be used to identify chunks of samples, and to perform individual Z-scoring within them. dtype : Numpy dtype, optional Target dtype that is used for upcasting, in case integer data is to be Z-scored. """ Mapper.__init__(self, **kwargs) self.__chunks_attr = chunks_attr self.__params = params self.__param_est = param_est self.__params_dict = None self.__dtype = dtype # secret switch to perform in-place z-scoring self._secret_inplace_zscore = False
def __init__(self, selector=None, demean=True): """Initialize the ProjectionMapper Parameters ---------- selector : None or list Which components (i.e. columns of the projection matrix) should be used for mapping. If `selector` is `None` all components are used. If a list is provided, all list elements are treated as component ids and the respective components are selected (all others are discarded). demean : bool Either data should be demeaned while computing projections and applied back while doing reverse() """ Mapper.__init__(self) # by default we want to wipe the feature attributes out during mapping self._fa_filter = [] self._selector = selector self._proj = None """Forward projection matrix.""" self._recon = None """Reverse projection (reconstruction) matrix.""" self._demean = demean """Flag whether to demean the to be projected data, prior to projection. """ self._offset_in = None """Offset (most often just mean) in the input space""" self._offset_out = None """Offset (most often just mean) in the output space"""
def __init__(self, dim=1, wavelet='sym4', mode='per', maxlevel=None): """Initialize _WaveletMapper mapper :Parameters: dim : int or tuple of int dimensions to work across (for now just scalar value, ie 1D transformation) is supported wavelet : basestring one from the families available withing pywt package mode : basestring periodization mode maxlevel : int or None number of levels to use. If None - automatically selected by pywt """ Mapper.__init__(self) self._dim = dim """Dimension to work along""" self._maxlevel = maxlevel """Maximal level of decomposition. None for automatic""" if not wavelet in pywt.wavelist(): raise ValueError, \ "Unknown family of wavelets '%s'. Please use one " \ "available from the list %s" % (wavelet, pywt.wavelist()) self._wavelet = wavelet """Wavelet family to use""" if not mode in pywt.MODES.modes: raise ValueError, \ "Unknown periodization mode '%s'. Please use one " \ "available from the list %s" % (mode, pywt.MODES.modes) self._mode = mode """Periodization mode"""
def __init__(self, axis, fx, fxargs=None, uattrs=None, attrfx='merge'): """ Parameters ---------- axis : {'samples', 'features'} fx : callable fxargs : tuple uattrs : list List of attribute names to consider. All possible combinations of unique elements of these attributes are used to determine the sample groups to operate on. attrfx : callable Functor that is called with each sample attribute elements matching the respective samples group. By default the unique value is determined. If the content of the attribute is not uniform for a samples group a unique string representation is created. If `None`, attributes are not altered. """ Mapper.__init__(self) if not axis in ['samples', 'features']: raise ValueError("%s `axis` arguments can only be 'samples' or " "'features' (got: '%s')." % repr(axis)) self.__axis = axis self.__uattrs = uattrs self.__fx = fx if not fxargs is None: self.__fxargs = fxargs else: self.__fxargs = () if attrfx == 'merge': self.__attrfx = _uniquemerge2literal else: self.__attrfx = attrfx
def __init__(self,num_surrogates=10,blocksize=None,*args,**kwargs): Mapper.__init__(self,*args,**kwargs) self._num_surrogates = num_surrogates self._clusters = [] # Will become list of tuples (channel, band, (start,end), probability) self._is_trained = False #Shape for each sample of the data this mapper was trained on self._sample_shape = None self._blocksize = blocksize
def __init__(self, num_surrogates=10, blocksize=None, *args, **kwargs): Mapper.__init__(self, *args, **kwargs) self._num_surrogates = num_surrogates self._clusters = [ ] # Will become list of tuples (channel, band, (start,end), probability) self._is_trained = False #Shape for each sample of the data this mapper was trained on self._sample_shape = None self._blocksize = blocksize
def __init__(self, shape=None, **kwargs): """ Parameters ---------- shape : tuple The shape of a single sample. If this argument is given the mapper is going to be fully configured and no training is necessary anymore. """ Mapper.__init__(self, **kwargs) self.__origshape = None self.__nfeatures = None if not shape is None: self._train_with_shape(shape)
def __init__(self, shape=None, **kwargs): """ Parameters ---------- shape : tuple The shape of a single sample. If this argument is given the mapper is going to be fully configured and no training is necessary anymore. """ Mapper.__init__(self, auto_train=True, **kwargs) self.__origshape = None self.__nfeatures = None if not shape is None: self._train_with_shape(shape)
def __init__(self, mask, **kwargs): """Initialize MaskMapper :Parameters: mask : array an array in the original dataspace and its nonzero elements are used to define the features included in the dataset """ Mapper.__init__(self, **kwargs) self.__mask = self.__maskdim = self.__masksize = \ self.__masknonzerosize = self.__forwardmap = \ self.__masknonzero = None # to make pylint happy self._initMask(mask)
def __init__(self, polyord=1, chunks_attr=None, opt_regs=None, inspace=None): """ Parameters ---------- polyord : int or list, optional Order of the Legendre polynomial to remove from the data. This will remove every polynomial up to and including the provided value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order polynomials from the data. np.B.: The 0th polynomial is the baseline shift, the 1st is the linear trend. If you specify a single int and `chunks_attr` is not None, then this value is used for each chunk. You can also specify a different polyord value for each chunk by providing a list or ndarray of polyord values the length of the number of chunks. chunks_attr : str or None If None, the whole dataset is detrended at once. Otherwise, the given samples attribute (given by its name) is used to define chunks of the dataset that are processed individually. In that case, all the samples within a chunk should be in contiguous order and the chunks should be sorted in order from low to high -- unless the dataset provides information about the coordinate of each sample in the space that should be spanned be the polynomials (see `inspace` argument). opt_regs : list or None Optional list of sample attribute names that should be used as additional regressors. One example would be to regress out motion parameters. inspace : str or None If not None, a samples attribute of the same name is added to the mapped dataset that stores the coordinates of each sample in the space that is spanned by the polynomials. If an attribute of that name is already present in the input dataset its values are interpreted as sample coordinates in the space that should be spanned by the polynomials. """ Mapper.__init__(self, inspace=inspace) self.__chunks_attr = chunks_attr self.__polyord = polyord self.__opt_reg = opt_regs # things that come from train() self._polycoords = None self._regs = None # secret switch to perform in-place detrending self._secret_inplace_detrend = False
def __init__(self, polyord=1, chunks_attr=None, opt_regs=None, **kwargs): """ Parameters ---------- polyord : int or list, optional Order of the Legendre polynomial to remove from the data. This will remove every polynomial up to and including the provided value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order polynomials from the data. np.B.: The 0th polynomial is the baseline shift, the 1st is the linear trend. If you specify a single int and `chunks_attr` is not None, then this value is used for each chunk. You can also specify a different polyord value for each chunk by providing a list or ndarray of polyord values the length of the number of chunks. chunks_attr : str or None If None, the whole dataset is detrended at once. Otherwise, the given samples attribute (given by its name) is used to define chunks of the dataset that are processed individually. In that case, all the samples within a chunk should be in contiguous order and the chunks should be sorted in order from low to high -- unless the dataset provides information about the coordinate of each sample in the space that should be spanned be the polynomials (see `inspace` argument). opt_regs : list or None Optional list of sample attribute names that should be used as additional regressors. One example would be to regress out motion parameters. space : str or None If not None, a samples attribute of the same name is added to the mapped dataset that stores the coordinates of each sample in the space that is spanned by the polynomials. If an attribute of that name is already present in the input dataset its values are interpreted as sample coordinates in the space that should be spanned by the polynomials. """ self.__chunks_attr = chunks_attr self.__polyord = polyord self.__opt_reg = opt_regs # things that come from train() self._polycoords = None self._regs = None # secret switch to perform in-place detrending self._secret_inplace_detrend = False # need to init last to prevent base class puking Mapper.__init__(self, **kwargs)
def __repr__(self): s = Mapper.__repr__(self).rstrip(' )') # beautify if not s[-1] == '(': s += ' ' s += 'kshape=%s, niter=%i, learning_rate=%f, iradius=%f)' \ % (str(tuple(self.kshape)), self.niter, self.lrate, self.radius) return s
def __init__(self, shape=None, maxdims=None, **kwargs): """ Parameters ---------- shape : tuple The shape of a single sample. If this argument is given the mapper is going to be fully configured and no training is necessary anymore. maxdims : int or None The maximum number of dimensions to flatten (starting with the first). If None, all axes will be flattened. """ # by default auto train kwargs['auto_train'] = kwargs.get('auto_train', True) Mapper.__init__(self, **kwargs) self.__origshape = None # pylint pacifier self.__maxdims = maxdims if not shape is None: self._train_with_shape(shape)
def __init__(self, kshape, niter, learning_rate=0.005, iradius=None): """ Parameters ---------- kshape : (int, int) Shape of the internal Kohonen layer. Currently, only 2D Kohonen layers are supported, although the length of an axis might be set to 1. niter : int Number of iteration during network training. learning_rate : float Initial learning rate, which will continuously decreased during network training. iradius : float or None Initial radius of the Gaussian neighborhood kernel radius, which will continuously decreased during network training. If `None` (default) the radius is set equal to the longest edge of the Kohonen layer. """ # init base class Mapper.__init__(self) self.kshape = np.array(kshape, dtype='int') if iradius is None: self.radius = self.kshape.max() else: self.radius = iradius # learning rate self.lrate = learning_rate # number of training iterations self.niter = niter # precompute whatever can be done # scalar for decay of learning rate and radius across all iterations self.iter_scale = self.niter / np.log(self.radius) # the internal kohonen layer self._K = None
def __init__(self, k, algorithm='lle', **kwargs): """ :Parameters: k: int Number of nearest neighbor to be used by the algorithm. algorithm: 'lle' | 'hlle' Either use the standard LLE algorithm or Hessian Linear Local Embedding (HLLE). **kwargs: Additional arguments are passed to the underlying MDP node. Most importantly this is the `output_dim` argument, that determines the number of dimensions to mapper is using as output space. """ # no meaningful metric Mapper.__init__(self, metric=None) self._algorithm = algorithm self._node_kwargs = kwargs self._k = k self._node = None
def __init__(self, startpoints, boxlength, offset=0, collision_resolution='mean'): """ :Parameters: startpoints: sequence Index values along the first axis of 'data'. boxlength: int The number of elements after 'startpoint' along the first axis of 'data' to be considered for the boxcar. offset: int The offset between the provided starting point and the actual start of the boxcar. collision_resolution : 'mean' if a sample belonged to multiple output samples, then on reverse, how to resolve the value """ Mapper.__init__(self) startpoints = N.asanyarray(startpoints) if N.issubdtype(startpoints.dtype, 'i'): self.startpoints = startpoints else: if __debug__: debug('MAP', "Boxcar: obtained startpoints are not of int type." " Rounding and changing dtype") self.startpoints = N.asanyarray(N.round(startpoints), dtype='i') # Sanity checks if boxlength < 1: raise ValueError, "Boxlength lower than 1 makes no sense." if boxlength - int(boxlength) != 0: raise ValueError, "boxlength must be an integer value." self.boxlength = int(boxlength) self.offset = offset self.__selectors = None if not collision_resolution in self._COLLISION_RESOLUTIONS: raise ValueError, "Unknown method to resolve the collision." \ " Valid are %s" % self._COLLISION_RESOLUTIONS self.__collision_resolution = collision_resolution
def __init__(self, startpoints, boxlength, offset=0, **kwargs): """ Parameters ---------- startpoints : sequence Index values along the first axis of 'data'. boxlength : int The number of elements after 'startpoint' along the first axis of 'data' to be considered for the boxcar. offset : int The offset between the provided starting point and the actual start of the boxcar. """ Mapper.__init__(self, **kwargs) self._outshape = None startpoints = np.asanyarray(startpoints) if np.issubdtype(startpoints.dtype, 'i'): self.startpoints = startpoints else: if __debug__: debug('MAP', "Boxcar: obtained startpoints are not of int type." " Rounding and changing dtype") self.startpoints = np.asanyarray(np.round(startpoints), dtype='i') # Sanity checks if boxlength < 1: raise ValueError, "Boxlength lower than 1 makes no sense." if boxlength - int(boxlength) != 0: raise ValueError, "boxlength must be an integer value." self.boxlength = int(boxlength) self.offset = offset self.__selectors = None # build a list of list where each sublist contains the indexes of to be # averaged data elements self.__selectors = [ slice(i + offset, i + offset + boxlength) \ for i in startpoints ]
def __init__(self, fx=FirstAxisMean): """Initialize the PCAMapper Parameters: startpoints: A sequence of index value along the first axis of 'data'. boxlength: The number of elements after 'startpoint' along the first axis of 'data' to be considered for averaging. offset: The offset between the starting point and the averaging window (boxcar). collision_resolution : string if a sample belonged to multiple output samples, then on reverse, how to resolve the value (choices: 'mean') """ Mapper.__init__(self) self.__fx = fx self.__uniquechunks = None self.__uniquelabels = None self.__chunks = None self.__labels = None self.__datashape = None
def __init__(self, params=None, param_est=None, chunks_attr='chunks', dtype='float64', inspace=None): """ Parameters ---------- params : None or tuple(mean, std) or dict Fixed Z-Scoring parameters (mean, standard deviation). If provided, no parameters are estimated from the data. It is possible to specify individual parameters for each chunk by passing a dictionary with the chunk ids as keys and the parameter tuples as values. If None, parameters will be estimated from the training data. param_est : None or tuple(attrname, attrvalues) Limits the choice of samples used for automatic parameter estimation to a specific subset identified by a set of a given sample attribute values. The tuple should have the name of that sample attribute as the first element, and a sequence of attribute values as the second element. If None, all samples will be used for parameter estimation. chunks_attr : str or None If provided, it specifies the name of a samples attribute in the training data, unique values of which will be used to identify chunks of samples, and to perform individual Z-scoring within them. dtype : Numpy dtype, optional Target dtype that is used for upcasting, in case integer data is to be Z-scored. inspace : None Currently, this argument has no effect. """ Mapper.__init__(self, inspace=inspace) self.__chunks_attr = chunks_attr self.__params = params self.__param_est = param_est self.__params_dict = None self.__dtype = dtype # secret switch to perform in-place z-scoring self._secret_inplace_zscore = False
def __init__(self, flow, node_arguments=None, inspace=None): """ Parameters ---------- flow : mdp.Flow instance This flow instance is taken as the pristine source of which a copy is made for actual processing upon each training attempt. node_arguments : tuple, list A tuple or a list the same length as the flow. Each item is a list of arguments for the training of the corresponding node in the flow. If a node does not require additional arguments, None can be provided instead. Keyword arguments are currently not supported by mdp.Flow. inspace : see base class """ if not node_arguments is None and len(node_arguments) != len(flow): raise ValueError("Length of node_arguments (%i) does not match the " "number of nodes in the flow (%i)." % (len(node_arguments), len(flow))) Mapper.__init__(self, inspace=inspace) self.__pristine_flow = None self.flow = flow self.node_arguments = node_arguments
def __init__(self, bands, border=2, baseline_interval=None, data_interval=None, Fs=1000.0, *args, **kwargs): """Initialize the FrequencyBandPowerMapper :Parameters: bands: list of 2-tuples list of (fl,fh), edge-frequencies for filtering border: int order of the filter to be used baseline_interval: 2-tuple of ints indices of the interval to be used for baseline correction data_interval: 2-tuple of ints indices of the interval to be used for cluster search args, kwargs: additional arguments passed to parent class """ Mapper.__init__(self, *args, **kwargs) self._bands = bands self._border = border self._baseline_interval = baseline_interval self._data_interval = data_interval self._Fs = Fs #TODO: Perform some checks, for valid intervals etc. #precalculate filter parameters (butter) self._bs = list(np.zeros((len(bands)))) self._as = list(np.zeros((len(bands)))) for i_b in range(len(bands)): [self._bs[i_b], self._as[i_b] ] = butter(border, [bands[i_b][0] / (Fs / 2), bands[i_b][1] / (Fs / 2)], btype="band")
def __init__(self, flow, node_arguments=None, inspace=None): """ Parameters ---------- flow : mdp.Flow instance This flow instance is taken as the pristine source of which a copy is made for actual processing upon each training attempt. node_arguments : tuple, list A tuple or a list the same length as the flow. Each item is a list of arguments for the training of the corresponding node in the flow. If a node does not require additional arguments, None can be provided instead. Keyword arguments are currently not supported by mdp.Flow. inspace : see base class """ if not node_arguments is None and len(node_arguments) != len(flow): raise ValueError( "Length of node_arguments (%i) does not match the " "number of nodes in the flow (%i)." % (len(node_arguments), len(flow))) Mapper.__init__(self, inspace=inspace) self.__pristine_flow = None self.flow = flow self.node_arguments = node_arguments
def __init__(self, dim=1, wavelet='sym4', mode='per', maxlevel=None): """Initialize _WaveletMapper mapper Parameters ---------- dim : int or tuple of int dimensions to work across (for now just scalar value, ie 1D transformation) is supported wavelet : str one from the families available withing pywt package mode : str periodization mode maxlevel : int or None number of levels to use. If None - automatically selected by pywt """ Mapper.__init__(self) self._dim = dim """Dimension to work along""" self._maxlevel = maxlevel """Maximal level of decomposition. None for automatic""" if not wavelet in pywt.wavelist(): raise ValueError, \ "Unknown family of wavelets '%s'. Please use one " \ "available from the list %s" % (wavelet, pywt.wavelist()) self._wavelet = wavelet """Wavelet family to use""" if not mode in pywt.MODES.modes: raise ValueError, \ "Unknown periodization mode '%s'. Please use one " \ "available from the list %s" % (mode, pywt.MODES.modes) self._mode = mode """Periodization mode"""
def __repr__(self): s = Mapper.__repr__(self) m_repr = 'shape=%s' % repr(self.__origshape) return s.replace("(", "(%s, " % m_repr, 1)
def __init__(self, slicearg, **kwargs): Mapper.__init__(self, **kwargs) self._safe_assign_slicearg(slicearg)
def __init__(self, chunks_attr=None, inspace=None): Mapper.__init__(self, inspace=inspace) self.__chunks_attr = chunks_attr