Example #1
0
 def fit_transform(self, X, y, trial_index, window, tr):
     Xcs = []
     ycs = []
     uti = unique_nan(trial_index)
     uti = uti[np.logical_not(np.isnan(uti))]
     
     for n, ti in enumerate(uti):
         # Skip last trial to prevent padding overflow
         if n+1 == len(uti):
             break
         
         # Locate trial and either 
         # extend l to window, if needed
         # or shorten each trial to window, if needed
         mask = trial_index == ti
         l = np.sum(mask)
         if l < window:
             i = 1
             for j, ma in enumerate(mask):
                 if ma:
                     i += 1
                 if i > l:
                     pad = window - l + 1
                     mask[j:(j + pad)] = True
                     break
         elif window < l:
             i = 0
             for j, ma in enumerate(mask):
                 if ma:
                     i += 1
                 if i > window:
                    mask[j] = False 
                     
         Xtrial = X[mask,:]
         from simfMRI.norm import zscore
         Xtrial = zscore(Xtrial)
         
         assert Xtrial.shape == (window, X.shape[1]), "Xtrial wrong shape"
         
         if self.mode == 'decompose':
             Xcs.append(self.estimator.fit_transform(Xtrial))
         elif self.mode == 'cluster':
             # Useluster labels to create average timecourses
             clabels = self.estimator.fit_predict(Xtrial.transpose())
             uclabels = unique_nan(clabels)
             uclabels = sort_nanfirst(uclabels)
             
             Xc = np.zeros((Xtrial.shape[0], len(uclabels))) ## Init w/ 0
             for i, ucl in enumerate(uclabels):
                 Xc[:,i] = Xtrial[:,ucl == clabels].mean(1)
             Xcs.append(Xc)
         
         ycs.append(y[trial_index == ti][0])
         ti_cs.append(trial_index[trial_index == ti])
         
     assert len(Xcs) == len(ycs), ("Xcs and ycs mismatch")
     
     return Xcs, np.asarray(ycs), ti_cs 
Example #2
0
    def _fp(self, X):
        """The cluster workhorse

        Parameters
        ----------
        X : 2D array-like (n_sample, n_feature)
            The data to decompose

        Return
        ------
        Xc - 2D array-like (n_sample, n_clusters)
        """

        nrow = X.shape[0]

        clabels = self.estimator.fit_predict(X.transpose())
        uclabels = unique_nan(clabels)
        uclabels = sort_nanfirst(uclabels)
        # uclabels = sorted(np.unique(clabels))
        # uclabels = unique_sorted_with_nan(uclabels)

        # Average cluster examples, filling Xc
        Xc = np.zeros((nrow, len(uclabels)))         ## Init w/ 0
        for i, ucl in enumerate(uclabels):
            Xc[:,i] = X[:,ucl == clabels].mean(1)

        assert checkX(Xc)
        assert Xc.shape[0] == X.shape[0], ("After transform wrong row number")
        assert Xc.shape[1] == len(uclabels), ("Afer transform" 
            " wrong col number")

        return Xc
Example #3
0
    def fit_transform(self, X, y, trial_index, window, tr):
        """Converts X into time-avearage trials and decomposes  that
        matrix, possibly several times depending on y.

        Parameters
        ----------
        X : 2D array-like (n_sample, n_feature)
            The data to decompose

        y : 1D array, None by default
            Sample labels for the data

        trial_index : 1D array (n_sample, )
            Each unique entry should match a trial.

        window : int 
            Trial length

        norm : True
            A dummy argument

        Return
        ------
        Xcs : a list of 2D arrays (n_sample, n_components)
            The components for each unique y.

        csnames : 1D array
            The names of the components matrices
        """

        selector = fs.SelectPercentile(percentile=20)
        Xsel = selector.fit_transform(X, create_y(y))
        
        Xtrials = []
        Xcs = []
        csnames = []
    
        Xtrial, feature_names = self.avgfn(Xsel, y, trial_index, window, tr)
        unique_fn = sort_nanfirst(unique_nan(feature_names))

        # Split up by feature_names
        for yi in unique_fn:
            Xtrials.append(Xtrial[:, feature_names == yi])
            
        # and decompose.
        if self.mode == 'decompose':
            Xcs = [self._ft(Xt) for Xt in Xtrials]
        elif self.mode == 'cluster':
            Xcs = [self._fp(Xt) for Xt in Xtrials]
        else:
            raise ValueError("mode not understood.")

        ti_cs = [np.arange(xc.shape[0]) for xc in Xcs]
            ## In this case, Xcs[i] is only 1 trial long.
    
        return Xcs, unique_fn, ti_cs
Example #4
0
    def fit_transform(self, X, y, trial_index, window, tr):
        if self.mode == 'decompose':
            Xc = self._ft(X)
        elif self.mode == 'cluster':
            Xc = self._fp(X.transpose())
        else:
            raise ValueError("mode not understood.")
            
        unique_y = sort_nanfirst(unique_nan(y))        
        Xcs = [Xc[y == uy,:] for uy in unique_y]        
        ti_cs = [trial_index[y == uy] for uy in unique_y]        

        return Xcs, unique_y, ti_cs
Example #5
0
 def fit_transform(self, X, y, trial_index, window, tr):        
     selector = fs.SelectPercentile(percentile=25)
     Xsel = selector.fit_transform(X, create_y(y))
     
     import ipdb; ipdb.set_trace()
     
     if self.mode == 'decompose':
         Xc = self._ft(Xsel)
     elif self.mode == 'cluster':
         Xc = self._fp(Xsel)
     else:
         raise ValueError("mode not understood.")
     
     unique_y = sort_nanfirst(unique_nan(y))        
     Xcs = [Xc[y == uy,:] for uy in unique_y]
     ti_cs = [trial_index[y == uy] for uy in unique_y]
     
     return Xcs, unique_y, ti_cs
Example #6
0
    def fit_transform(self, X, y, trial_index, window, tr):
        """Average X by trial based on y (and trial_index).

        Parameters
        ----------
        X : 2D array-like (n_sample, n_feature)
            The data to decompose

        y : 1D array, None by default
            Sample labels for the data

        trial_index : Dummy

        window : Dumy
        
        tr: Dumy

        Return
        ------
        Xcs : TODO

        ycs : TODO
        """

        Xa = X.mean(1)[:,np.newaxis]
        
        if self.mode == 'decompose':
            Xc = self._ft(Xa)
        elif self.mode == 'cluster':
            Xc = self._fp(Xa)
        else:
            raise ValueError("mode not understood.")
        
        unique_y = sort_nanfirst(unique_nan(y))        
        Xcs = [Xc[y == uy,:] for uy in unique_y]
        ti_cs = [trial_index[y == uy] for uy in unique_y]

        return Xcs, unique_y, ti_cs
Example #7
0
    def run(self, basename, cond, index, wheelerdata, cond_to_rt, 
        smooth=False,
        filtfile=None, TR=2, trname="TR", 
        n_features=10, n_univariate=None, n_accumulator=None, n_decision=None, 
        n_noise=None, drift_noise=False, step_noise=False, z_noise=False,
        drift_noise_param=None, step_noise_param=None, z_noise_param=None,
        noise_f=white, hrf_f=None, hrf_params=None, prng=None):       
        """Reproduce the cond from the wheelerdata experiment
        
        Parameters
        ---------
        basename : str
            The name for the Reproduced datafile, will be suffixed
            by each cond and scode and .csv 
            (i.e. `'{0}_{1}_{2}.csv'.format(basename, cond, scode)`).
        cond : str
            A condition name found in the wheelerdata objects metadata
        index : str
            A name of a trial index found in the wheelerdata object metadata
        wheelerdata : object, instance of Wheelerdata
            A Wheelerdata object
        cond_to_rt: dict
            A map of cond (key) to reaction time (item, (int, float))    
        smooth : boolean, optional
            Do bandpass filtering (default False)
        filtfile : str, None
            A name of json file designed for reprocessing Wheelerdata metadata
        TR : float, int
            The repitition time of the experiement
        trname : str
            The name of the index of TRs in the metadata
        n_features : int
            The number of features in total (other n_* arguements
            must sum to this value
        n_univariate : int
            The number of univariate (boxcar) features
        n_accumulator : int
            The number of accumulator features
        n_decision : int
            The number of decision features
        n_noise : int
            The number of noise features
        drift_noise : boolean, optional
            Add noise to the drift rate of the accumulator features
        step_noise : boolean, optional
            Add Noise to each step accumulator features
        z_noise : boolean, optional
            Add noise to the start value of accumulator features
        drift_noise_param : None or dict, optional
            Parameters for drift_noise which is drawn from a
            Gaussian distribution. None defaults to: 
            `{"loc": 0, "scale" : 0.5}`
        step_noise_param : None or dict, optional
            Parameters for step_noise which is drawn from a 
            Gaussian distribution. None defaults to:
            `{"loc" : 0, "scale" : 0.2, "size" : 1}`
        z_noise_param : None or dict, optional
            Parameters for z_noise which is drawn from the uniform
            distribution. None defaults to:
            `{"low" : 0.01, "high" : 0.5, "size" : 1}`
        noise_f : function, optional
            Produces noise, must have signatures like `noise, prng = f(N, prng)`
        hrf_f : function, optional
            Returns a haemodynamic response, signature hrf_f(**hrf_params)
        hrf_params : dict
            Keyword parameters for hrf_f
        prng : None or RandomState object
            Allows for independent random draws, used for all 
            random sampling
        """

        mode = 'w'
        header = True

        # All *s lists correspond to wheelerdata.scodes
        scodes = self.data.scodes
        Xs, ys, yindices = make_bold_re(
                cond, index, self.data,
                cond_to_rt,
                filtfile=filtfile, 
                trname=trname,
                noise_f=noise_f, 
                hrf_f=hrf_f, 
                hrf_params=hrf_params, 
                n_features=n_features, 
                n_univariate=n_univariate, 
                n_accumulator=n_accumulator, 
                n_decision=n_decision, 
                n_noise=n_noise, 
                drift_noise=drift_noise, 
                step_noise=step_noise, 
                z_noise=z_noise,
                drift_noise_param=drift_noise_param, 
                step_noise_param=step_noise_param, 
                z_noise_param=z_noise_param,
                prng=prng)
        
        for scode, X, y, yindex in zip(scodes, Xs, ys, yindices):
            if smooth:
                X = smoothfn(X, tr=1.5, ub=0.10, lb=0.001)
            
            # Normalize
            norm = MinMaxScaler((0,1))
            X = norm.fit_transform(X.astype(np.float))
            
            Xcs, csnames, ti_cs = self.spacetime.fit_transform(
                    X, y, yindex, self.window, self.tr)

            # Name them,
            csnames = unique_nan(y)
            csnames = sort_nanfirst(csnames)

            # and write.
            for Xc, csname, ti in zip(Xcs, csnames, ti_cs):
                save_tcdf(
                        name=join_by_underscore(True, basename, csname), 
                        X=Xc, 
                        cond=csname,
                        dataname=join_by_underscore(False, 
                                os.path.split(basename)[-1], scode),
                        index=ti.astype(np.int),
                        header=header, 
                        mode=mode,
                        float_format="%.{0}f".format(self.nsig))
            
            # After s 1 go to append mode
            mode = 'a'
            header = False
Example #8
0
    def run(self, basename, smooth=False, filtfile=None, 
        n=None, tr=None, n_rt=None, n_trials_per_cond=None,
        durations=None ,noise=None, n_features=None, n_univariate=None, 
        n_accumulator=None, n_decision=None, n_noise=None, 
        n_repeated=None, drift_noise=False, step_noise=False):
        
        # Write init
        mode = 'w'
        header = True

        for scode in range(n):
            # If were past the first Ss data, append.
            if scode > 0:
                mode = 'a'
                header = False

            # Create the data
            X, y, y_trialcount = make_bold(
                    n_rt, 
                    n_trials_per_cond, 
                    tr, 
                    durations=durations, 
                    noise=noise, 
                    n_features=n_features, 
                    n_univariate=n_univariate, 
                    n_accumulator=n_accumulator, 
                    n_decision=n_decision,
                    n_noise=n_noise,
                    n_repeated=n_repeated,
                    drift_noise=drift_noise,
                    step_noise=step_noise)

            targets = construct_targets(trial_index=y_trialcount, y=y)

            # Drop baseline trials created by make_bold
            baselinemask = np.arange(y.shape[0])[y != 0]
            X = X[baselinemask, ]
            targets = filter_targets(baselinemask, targets)

            # Filter and
            if filtfile is not None:
                X, targets = filterX(filtfile, X, targets)
            if smooth:
                X = smoothfn(X, tr=1.5, ub=0.10, lb=0.001)
            
            # Normalize
            norm = MinMaxScaler((0,1))
            X = norm.fit_transform(X.astype(np.float))
            
            # finally decompose.
            Xcs, csnames, ti_cs = self.spacetime.fit_transform(
                    X, targets["y"], targets["trial_index"], 
                    self.window)
            
            # Name them,
            csnames = unique_nan(y)
            csnames = sort_nanfirst(csnames)

            # and write.
            for Xc, csname, ti in zip(Xcs, csnames, ti_cs):
                save_tcdf(
                        name=join_by_underscore(True, basename, csname), 
                        X=Xc, 
                        cond=csname,
                        dataname=join_by_underscore(False, 
                                os.path.split(basename)[-1], scode),
                        index=ti.astype(np.int),
                        header=header, 
                        mode=mode,
                        float_format="%.{0}f".format(self.nsig))
def make_bold(cond,
              index,
              wheelerdata,
              cond_to_rt,
              filtfile=None,
              TR=2,
              trname="TR",
              n_features=10,
              n_univariate=None,
              n_accumulator=None,
              n_decision=None,
              n_noise=None,
              drift_noise=False,
              step_noise=False,
              z_noise=False,
              drift_noise_param=None,
              step_noise_param=None,
              z_noise_param=None,
              noise_f=white,
              hrf_f=None,
              hrf_params=None,
              prng=None):
    """Make BOLD timecourse features based on Wheelerdata

    Parameters
    ---------
    cond : str
        A condition name found in the wheelerdata objects metadata
    index : str
        A name of a trial index found in the wheelerdata object metadata
    wheelerdata : object, instance of Wheelerdata
        A Wheelerdata object
    cond_to_rt: dict
        A map of cond (key) to reaction time (item, (int, float))
    filtfile : str, None
        A name of json file designed for reprocessing Wheelerdata metadata
    TR : float, int
        The repitition time of the experiement
    trname : str
        The name of the index of TRs in the metadata
    n_features : int
        The number of features in total (other n_* arguements
        must sum to this value
    n_univariate : int
        The number of univariate (boxcar) features
    n_accumulator : int
        The number of accumulator features
    n_decision : int
        The number of decision features
    n_noise : int
        The number of noise features
    drift_noise : boolean, optional
        Add noise to the drift rate of the accumulator features
    step_noise : boolean, optional
        Add Noise to each step accumulator features
    z_noise : boolean, optional
        Add noise to the start value of accumulator features
    drift_noise_param : None or dict, optional
        Parameters for drift_noise which is drawn from a
        Gaussian distribution. None defaults to: 
        `{"loc": 0, "scale" : 0.5}`
    step_noise_param : None or dict, optional
        Parameters for step_noise which is drawn from a 
        Gaussian distribution. None defaults to:
        `{"loc" : 0, "scale" : 0.2, "size" : 1}`
    z_noise_param : None or dict, optional
        Parameters for z_noise which is drawn from the uniform
        distribution. None defaults to:
        `{"low" : 0.01, "high" : 0.5, "size" : 1}`
    noise_f : function, optional
        Produces noise, must have signatures like `noise, prng = f(N, prng)`
    hrf_f : function, optional
        Returns a haemodynamic response, signature hrf_f(**hrf_params)
    hrf_params : dict
        Keyword parameters for hrf_f
    prng : None or RandomState object
        Allows for independent random draws, used for all 
        random sampling
    """

    # ----
    # Feature composition
    if n_noise == None:
        n_noise = 0
    if n_accumulator == None:
        n_accumulator = 0
    if n_decision == None:
        n_decision = 0
    if n_univariate == None:
        n_univariate = (n_features - n_noise - n_accumulator - n_decision)

    if (n_features - n_univariate - n_accumulator - n_noise - n_decision) != 0:
        raise ValueError("The number of features don't add up.")

    # Load wheelerdata
    metas = wheelerdata.get_RT_metadata_paths()

    # Get to work simulating
    Xs, ys, yindices = [], [], []
    for meta in metas:
        # Get data, preprocess too,
        data = csv_to_targets(meta)
        data = tr_pad_targets(data, trname, data[trname].shape[0], pad=np.nan)

        if filtfile is not None:
            data = reprocess_targets(filtfile, data, np.nan,
                                     ("TR", "trialcount"))

        # Check cond_to_rt
        for c in unique_nan(data[cond]):
            try:
                cond_to_rt[c]
            except KeyError:
                raise KeyError("{0} not present in cond_to_rt".format(c))

        # use cond to create y
        y = create_y(data[cond])
        yindex = data[index]

        # make accumulator and decision traces
        if n_accumulator > 0:
            data["accumulator"] = _make_accumulator_array(y,
                                                          yindex,
                                                          cond_to_rt,
                                                          drift_noise,
                                                          step_noise,
                                                          z_noise,
                                                          drift_noise_param,
                                                          step_noise_param,
                                                          z_noise_param,
                                                          prng=prng)
        if n_decision > 0:
            data["decision"] = _make_decision_array(y, yindex, cond_to_rt)

        # Populate Xmeta
        boldsim = Reproduce(y,
                            data,
                            noise_f=noise_f,
                            hrf_f=hrf_f,
                            hrf_params=hrf_params,
                            TR=TR,
                            prng=prng)
        boldsim.create_dm_from_y(convolve=False)

        n_sample_feature = boldsim.dm.shape[0]
        Xmeta = np.zeros((n_sample_feature, n_features))

        # 1. univariate features
        start = 0
        stop = n_univariate
        for j in range(start, stop):
            boldsim.create_bold(np.sum(boldsim.dm[:, 1:], axis=1),
                                convolve=True)
            Xmeta[:, j] = boldsim.bold

        # 2. accumulator features
        start = stop
        stop = start + n_accumulator
        for j in range(start, stop):
            boldsim.create_bold(data["accumulator"], convolve=True)
            Xmeta[:, j] = boldsim.bold

        # 3. decision features
        start = stop
        stop = start + n_decision
        for j in range(start, stop):
            boldsim.create_bold(data["decision"], convolve=True)
            Xmeta[:, j] = boldsim.bold

        # 4. noise features:
        start = stop
        stop = start + n_noise
        for j in range(start, stop):
            # Drop baseline from noise
            randbold = rand(boldsim.dm.shape[0])
            randbold[boldsim.y == 0] = 0.0
            boldsim.create_bold(randbold, convolve=True)
            Xmeta[:, j] = boldsim.bold

        Xs.append(Xmeta)
        ys.append(y)
        yindices.append(yindex)

    return Xs, ys, yindices