def fit_transform(self, X, y, trial_index, window, tr): Xcs = [] ycs = [] uti = unique_nan(trial_index) uti = uti[np.logical_not(np.isnan(uti))] for n, ti in enumerate(uti): # Skip last trial to prevent padding overflow if n+1 == len(uti): break # Locate trial and either # extend l to window, if needed # or shorten each trial to window, if needed mask = trial_index == ti l = np.sum(mask) if l < window: i = 1 for j, ma in enumerate(mask): if ma: i += 1 if i > l: pad = window - l + 1 mask[j:(j + pad)] = True break elif window < l: i = 0 for j, ma in enumerate(mask): if ma: i += 1 if i > window: mask[j] = False Xtrial = X[mask,:] from simfMRI.norm import zscore Xtrial = zscore(Xtrial) assert Xtrial.shape == (window, X.shape[1]), "Xtrial wrong shape" if self.mode == 'decompose': Xcs.append(self.estimator.fit_transform(Xtrial)) elif self.mode == 'cluster': # Useluster labels to create average timecourses clabels = self.estimator.fit_predict(Xtrial.transpose()) uclabels = unique_nan(clabels) uclabels = sort_nanfirst(uclabels) Xc = np.zeros((Xtrial.shape[0], len(uclabels))) ## Init w/ 0 for i, ucl in enumerate(uclabels): Xc[:,i] = Xtrial[:,ucl == clabels].mean(1) Xcs.append(Xc) ycs.append(y[trial_index == ti][0]) ti_cs.append(trial_index[trial_index == ti]) assert len(Xcs) == len(ycs), ("Xcs and ycs mismatch") return Xcs, np.asarray(ycs), ti_cs
def _fp(self, X): """The cluster workhorse Parameters ---------- X : 2D array-like (n_sample, n_feature) The data to decompose Return ------ Xc - 2D array-like (n_sample, n_clusters) """ nrow = X.shape[0] clabels = self.estimator.fit_predict(X.transpose()) uclabels = unique_nan(clabels) uclabels = sort_nanfirst(uclabels) # uclabels = sorted(np.unique(clabels)) # uclabels = unique_sorted_with_nan(uclabels) # Average cluster examples, filling Xc Xc = np.zeros((nrow, len(uclabels))) ## Init w/ 0 for i, ucl in enumerate(uclabels): Xc[:,i] = X[:,ucl == clabels].mean(1) assert checkX(Xc) assert Xc.shape[0] == X.shape[0], ("After transform wrong row number") assert Xc.shape[1] == len(uclabels), ("Afer transform" " wrong col number") return Xc
def fit_transform(self, X, y, trial_index, window, tr): """Converts X into time-avearage trials and decomposes that matrix, possibly several times depending on y. Parameters ---------- X : 2D array-like (n_sample, n_feature) The data to decompose y : 1D array, None by default Sample labels for the data trial_index : 1D array (n_sample, ) Each unique entry should match a trial. window : int Trial length norm : True A dummy argument Return ------ Xcs : a list of 2D arrays (n_sample, n_components) The components for each unique y. csnames : 1D array The names of the components matrices """ selector = fs.SelectPercentile(percentile=20) Xsel = selector.fit_transform(X, create_y(y)) Xtrials = [] Xcs = [] csnames = [] Xtrial, feature_names = self.avgfn(Xsel, y, trial_index, window, tr) unique_fn = sort_nanfirst(unique_nan(feature_names)) # Split up by feature_names for yi in unique_fn: Xtrials.append(Xtrial[:, feature_names == yi]) # and decompose. if self.mode == 'decompose': Xcs = [self._ft(Xt) for Xt in Xtrials] elif self.mode == 'cluster': Xcs = [self._fp(Xt) for Xt in Xtrials] else: raise ValueError("mode not understood.") ti_cs = [np.arange(xc.shape[0]) for xc in Xcs] ## In this case, Xcs[i] is only 1 trial long. return Xcs, unique_fn, ti_cs
def fit_transform(self, X, y, trial_index, window, tr): if self.mode == 'decompose': Xc = self._ft(X) elif self.mode == 'cluster': Xc = self._fp(X.transpose()) else: raise ValueError("mode not understood.") unique_y = sort_nanfirst(unique_nan(y)) Xcs = [Xc[y == uy,:] for uy in unique_y] ti_cs = [trial_index[y == uy] for uy in unique_y] return Xcs, unique_y, ti_cs
def fit_transform(self, X, y, trial_index, window, tr): selector = fs.SelectPercentile(percentile=25) Xsel = selector.fit_transform(X, create_y(y)) import ipdb; ipdb.set_trace() if self.mode == 'decompose': Xc = self._ft(Xsel) elif self.mode == 'cluster': Xc = self._fp(Xsel) else: raise ValueError("mode not understood.") unique_y = sort_nanfirst(unique_nan(y)) Xcs = [Xc[y == uy,:] for uy in unique_y] ti_cs = [trial_index[y == uy] for uy in unique_y] return Xcs, unique_y, ti_cs
def fit_transform(self, X, y, trial_index, window, tr): """Average X by trial based on y (and trial_index). Parameters ---------- X : 2D array-like (n_sample, n_feature) The data to decompose y : 1D array, None by default Sample labels for the data trial_index : Dummy window : Dumy tr: Dumy Return ------ Xcs : TODO ycs : TODO """ Xa = X.mean(1)[:,np.newaxis] if self.mode == 'decompose': Xc = self._ft(Xa) elif self.mode == 'cluster': Xc = self._fp(Xa) else: raise ValueError("mode not understood.") unique_y = sort_nanfirst(unique_nan(y)) Xcs = [Xc[y == uy,:] for uy in unique_y] ti_cs = [trial_index[y == uy] for uy in unique_y] return Xcs, unique_y, ti_cs
def run(self, basename, cond, index, wheelerdata, cond_to_rt, smooth=False, filtfile=None, TR=2, trname="TR", n_features=10, n_univariate=None, n_accumulator=None, n_decision=None, n_noise=None, drift_noise=False, step_noise=False, z_noise=False, drift_noise_param=None, step_noise_param=None, z_noise_param=None, noise_f=white, hrf_f=None, hrf_params=None, prng=None): """Reproduce the cond from the wheelerdata experiment Parameters --------- basename : str The name for the Reproduced datafile, will be suffixed by each cond and scode and .csv (i.e. `'{0}_{1}_{2}.csv'.format(basename, cond, scode)`). cond : str A condition name found in the wheelerdata objects metadata index : str A name of a trial index found in the wheelerdata object metadata wheelerdata : object, instance of Wheelerdata A Wheelerdata object cond_to_rt: dict A map of cond (key) to reaction time (item, (int, float)) smooth : boolean, optional Do bandpass filtering (default False) filtfile : str, None A name of json file designed for reprocessing Wheelerdata metadata TR : float, int The repitition time of the experiement trname : str The name of the index of TRs in the metadata n_features : int The number of features in total (other n_* arguements must sum to this value n_univariate : int The number of univariate (boxcar) features n_accumulator : int The number of accumulator features n_decision : int The number of decision features n_noise : int The number of noise features drift_noise : boolean, optional Add noise to the drift rate of the accumulator features step_noise : boolean, optional Add Noise to each step accumulator features z_noise : boolean, optional Add noise to the start value of accumulator features drift_noise_param : None or dict, optional Parameters for drift_noise which is drawn from a Gaussian distribution. None defaults to: `{"loc": 0, "scale" : 0.5}` step_noise_param : None or dict, optional Parameters for step_noise which is drawn from a Gaussian distribution. None defaults to: `{"loc" : 0, "scale" : 0.2, "size" : 1}` z_noise_param : None or dict, optional Parameters for z_noise which is drawn from the uniform distribution. None defaults to: `{"low" : 0.01, "high" : 0.5, "size" : 1}` noise_f : function, optional Produces noise, must have signatures like `noise, prng = f(N, prng)` hrf_f : function, optional Returns a haemodynamic response, signature hrf_f(**hrf_params) hrf_params : dict Keyword parameters for hrf_f prng : None or RandomState object Allows for independent random draws, used for all random sampling """ mode = 'w' header = True # All *s lists correspond to wheelerdata.scodes scodes = self.data.scodes Xs, ys, yindices = make_bold_re( cond, index, self.data, cond_to_rt, filtfile=filtfile, trname=trname, noise_f=noise_f, hrf_f=hrf_f, hrf_params=hrf_params, n_features=n_features, n_univariate=n_univariate, n_accumulator=n_accumulator, n_decision=n_decision, n_noise=n_noise, drift_noise=drift_noise, step_noise=step_noise, z_noise=z_noise, drift_noise_param=drift_noise_param, step_noise_param=step_noise_param, z_noise_param=z_noise_param, prng=prng) for scode, X, y, yindex in zip(scodes, Xs, ys, yindices): if smooth: X = smoothfn(X, tr=1.5, ub=0.10, lb=0.001) # Normalize norm = MinMaxScaler((0,1)) X = norm.fit_transform(X.astype(np.float)) Xcs, csnames, ti_cs = self.spacetime.fit_transform( X, y, yindex, self.window, self.tr) # Name them, csnames = unique_nan(y) csnames = sort_nanfirst(csnames) # and write. for Xc, csname, ti in zip(Xcs, csnames, ti_cs): save_tcdf( name=join_by_underscore(True, basename, csname), X=Xc, cond=csname, dataname=join_by_underscore(False, os.path.split(basename)[-1], scode), index=ti.astype(np.int), header=header, mode=mode, float_format="%.{0}f".format(self.nsig)) # After s 1 go to append mode mode = 'a' header = False
def run(self, basename, smooth=False, filtfile=None, n=None, tr=None, n_rt=None, n_trials_per_cond=None, durations=None ,noise=None, n_features=None, n_univariate=None, n_accumulator=None, n_decision=None, n_noise=None, n_repeated=None, drift_noise=False, step_noise=False): # Write init mode = 'w' header = True for scode in range(n): # If were past the first Ss data, append. if scode > 0: mode = 'a' header = False # Create the data X, y, y_trialcount = make_bold( n_rt, n_trials_per_cond, tr, durations=durations, noise=noise, n_features=n_features, n_univariate=n_univariate, n_accumulator=n_accumulator, n_decision=n_decision, n_noise=n_noise, n_repeated=n_repeated, drift_noise=drift_noise, step_noise=step_noise) targets = construct_targets(trial_index=y_trialcount, y=y) # Drop baseline trials created by make_bold baselinemask = np.arange(y.shape[0])[y != 0] X = X[baselinemask, ] targets = filter_targets(baselinemask, targets) # Filter and if filtfile is not None: X, targets = filterX(filtfile, X, targets) if smooth: X = smoothfn(X, tr=1.5, ub=0.10, lb=0.001) # Normalize norm = MinMaxScaler((0,1)) X = norm.fit_transform(X.astype(np.float)) # finally decompose. Xcs, csnames, ti_cs = self.spacetime.fit_transform( X, targets["y"], targets["trial_index"], self.window) # Name them, csnames = unique_nan(y) csnames = sort_nanfirst(csnames) # and write. for Xc, csname, ti in zip(Xcs, csnames, ti_cs): save_tcdf( name=join_by_underscore(True, basename, csname), X=Xc, cond=csname, dataname=join_by_underscore(False, os.path.split(basename)[-1], scode), index=ti.astype(np.int), header=header, mode=mode, float_format="%.{0}f".format(self.nsig))
def make_bold(cond, index, wheelerdata, cond_to_rt, filtfile=None, TR=2, trname="TR", n_features=10, n_univariate=None, n_accumulator=None, n_decision=None, n_noise=None, drift_noise=False, step_noise=False, z_noise=False, drift_noise_param=None, step_noise_param=None, z_noise_param=None, noise_f=white, hrf_f=None, hrf_params=None, prng=None): """Make BOLD timecourse features based on Wheelerdata Parameters --------- cond : str A condition name found in the wheelerdata objects metadata index : str A name of a trial index found in the wheelerdata object metadata wheelerdata : object, instance of Wheelerdata A Wheelerdata object cond_to_rt: dict A map of cond (key) to reaction time (item, (int, float)) filtfile : str, None A name of json file designed for reprocessing Wheelerdata metadata TR : float, int The repitition time of the experiement trname : str The name of the index of TRs in the metadata n_features : int The number of features in total (other n_* arguements must sum to this value n_univariate : int The number of univariate (boxcar) features n_accumulator : int The number of accumulator features n_decision : int The number of decision features n_noise : int The number of noise features drift_noise : boolean, optional Add noise to the drift rate of the accumulator features step_noise : boolean, optional Add Noise to each step accumulator features z_noise : boolean, optional Add noise to the start value of accumulator features drift_noise_param : None or dict, optional Parameters for drift_noise which is drawn from a Gaussian distribution. None defaults to: `{"loc": 0, "scale" : 0.5}` step_noise_param : None or dict, optional Parameters for step_noise which is drawn from a Gaussian distribution. None defaults to: `{"loc" : 0, "scale" : 0.2, "size" : 1}` z_noise_param : None or dict, optional Parameters for z_noise which is drawn from the uniform distribution. None defaults to: `{"low" : 0.01, "high" : 0.5, "size" : 1}` noise_f : function, optional Produces noise, must have signatures like `noise, prng = f(N, prng)` hrf_f : function, optional Returns a haemodynamic response, signature hrf_f(**hrf_params) hrf_params : dict Keyword parameters for hrf_f prng : None or RandomState object Allows for independent random draws, used for all random sampling """ # ---- # Feature composition if n_noise == None: n_noise = 0 if n_accumulator == None: n_accumulator = 0 if n_decision == None: n_decision = 0 if n_univariate == None: n_univariate = (n_features - n_noise - n_accumulator - n_decision) if (n_features - n_univariate - n_accumulator - n_noise - n_decision) != 0: raise ValueError("The number of features don't add up.") # Load wheelerdata metas = wheelerdata.get_RT_metadata_paths() # Get to work simulating Xs, ys, yindices = [], [], [] for meta in metas: # Get data, preprocess too, data = csv_to_targets(meta) data = tr_pad_targets(data, trname, data[trname].shape[0], pad=np.nan) if filtfile is not None: data = reprocess_targets(filtfile, data, np.nan, ("TR", "trialcount")) # Check cond_to_rt for c in unique_nan(data[cond]): try: cond_to_rt[c] except KeyError: raise KeyError("{0} not present in cond_to_rt".format(c)) # use cond to create y y = create_y(data[cond]) yindex = data[index] # make accumulator and decision traces if n_accumulator > 0: data["accumulator"] = _make_accumulator_array(y, yindex, cond_to_rt, drift_noise, step_noise, z_noise, drift_noise_param, step_noise_param, z_noise_param, prng=prng) if n_decision > 0: data["decision"] = _make_decision_array(y, yindex, cond_to_rt) # Populate Xmeta boldsim = Reproduce(y, data, noise_f=noise_f, hrf_f=hrf_f, hrf_params=hrf_params, TR=TR, prng=prng) boldsim.create_dm_from_y(convolve=False) n_sample_feature = boldsim.dm.shape[0] Xmeta = np.zeros((n_sample_feature, n_features)) # 1. univariate features start = 0 stop = n_univariate for j in range(start, stop): boldsim.create_bold(np.sum(boldsim.dm[:, 1:], axis=1), convolve=True) Xmeta[:, j] = boldsim.bold # 2. accumulator features start = stop stop = start + n_accumulator for j in range(start, stop): boldsim.create_bold(data["accumulator"], convolve=True) Xmeta[:, j] = boldsim.bold # 3. decision features start = stop stop = start + n_decision for j in range(start, stop): boldsim.create_bold(data["decision"], convolve=True) Xmeta[:, j] = boldsim.bold # 4. noise features: start = stop stop = start + n_noise for j in range(start, stop): # Drop baseline from noise randbold = rand(boldsim.dm.shape[0]) randbold[boldsim.y == 0] = 0.0 boldsim.create_bold(randbold, convolve=True) Xmeta[:, j] = boldsim.bold Xs.append(Xmeta) ys.append(y) yindices.append(yindex) return Xs, ys, yindices