def balance_dataset(ds, label, sort=True, **kwargs): ################ To be changed ###################### m_fixation = ds.targets == 'fixation' ev_fix = zip(ds.chunks[m_fixation], 4*((ds.sa.events_number[m_fixation]+2)/4 - 1 )+2) #################################################### ev_fix=np.array(ev_fix) ds.sa.events_number[m_fixation] = np.int_(ev_fix.T[1]) arg_sort = np.argsort(ds.sa.events_number) events = find_events(chunks = ds[arg_sort].sa.chunks, targets = ds[arg_sort].sa.targets) # min duration min_duration = np.min( [e['duration'] for e in events]) mask = False for ev in np.unique(ds.sa.events_number): mask_event = ds.sa.events_number == ev mask_event[np.nonzero(mask_event)[0][min_duration-1]+1:] = False mask = mask + mask_event if sort == True: arg_sort = np.argsort(ds[mask].sa.events_number) ds = ds[mask][arg_sort] else: ds = ds[mask] ds.a.events = find_events(targets = ds.targets, chunks = ds.chunks) return ds
def load_spatiotemporal_dataset(ds, **kwargs): onset = 0 for arg in kwargs: if (arg == 'onset'): onset = kwargs[arg] if (arg == 'duration'): duration = kwargs[arg] if (arg == 'enable_results'): enable_results = kwargs[arg] events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) #task_events = [e for e in events if e['targets'] in ['Vipassana','Samatha']] if 'duration' in locals(): events = [e for e in events if e['duration'] >= duration] else: duration = np.min([ev['duration'] for ev in events]) for e in events: e['onset'] += onset e['duration'] = duration evds = eventrelated_dataset(ds, events = events) return evds
def build_events_ds(ds, new_duration, **kwargs): """ This function is used to convert a dataset in a event_related dataset. Used for transfer learning and clustering, thus a classifier has been trained on a event related dataset and the prediction should be done on the same kind of the dataset. Parameters ---------- ds : Dataset The dataset to be converted new_duration : integer Is the duration of the single event, if experiment events are of different length, it takes the events greater or equal to new_duration. kwarsg : dict win_number: is the number of window of one single event to be extracted, if it is not setted, it assumes the ratio between event duration and new_duration overlap: Returns ------- Dataset: the event_related dataset """ for arg in kwargs: if arg == 'win_number': win_number = kwargs[arg] if arg == 'overlap': overlap = kwargs[arg] events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) labels = np.unique(ds.targets) current_duration = dict() for l in labels: d = [e['duration'] for e in events if e['targets'] == l] current_duration[l] = np.unique(d)[0] def calc_overlap(w, l, n): return w - np.floor((l - w)/(n - 1)) def calc_win_number (w, l, o): return (l - w)/(w - o) + 1 if 'overlap' not in locals(): overlap = calc_overlap(new_duration, current_duration[l], win_number) else: if overlap >= new_duration: overlap = new_duration - 1 if 'win_number' not in locals(): #win_number = np.ceil(current_duration[l]/np.float(new_duration)) win_number = calc_win_number(new_duration, current_duration[l], overlap) new_event_list = [] for e in events: onset = e['onset'] chunks = e['chunks'] targets = e['targets'] duration = e['duration'] for i in np.arange(win_number): new_onset = onset + i * (new_duration - overlap) new_event = dict() new_event['onset'] = new_onset new_event['duration'] = new_duration new_event['targets'] = targets new_event['chunks'] = chunks new_event_list.append(new_event) logger.info('Building new event related dataset...') evds = eventrelated_dataset(ds, events = new_event_list) return evds
def spatiotemporal(ds, **kwargs): onset = 0 for arg in kwargs: if (arg == 'onset'): onset = kwargs[arg] if (arg == 'duration'): duration = kwargs[arg] if (arg == 'enable_results'): enable_results = kwargs[arg] if (arg == 'permutations'): permutations = int(kwargs[arg]) events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) if 'duration' in locals(): events = [e for e in events if e['duration'] >= duration] else: duration = np.min([ev['duration'] for ev in events]) for e in events: e['onset'] += onset e['duration'] = duration evds = eventrelated_dataset(ds, events = events) [fclf, cvte] = setup_classifier(**kwargs) logger.info('Cross validation is performing ...') res = cvte(evds) print cvte.ca.stats if permutations != 0: print cvte.ca.null_prob.samples dist_len = len(cvte.null_dist.dists()) err_arr = np.zeros(dist_len) for i in range(dist_len): err_arr[i] = 1 - cvte.ca.stats.stats['ACC'] total_p_value = np.mean(cvte.null_dist.p(err_arr)) p_value = cvte.ca.null_prob.samples else: total_p_value = 0. p_value = np.array([0,0]) try: sensana = fclf.get_sensitivity_analyzer() res_sens = sensana(evds) except Exception, err: allowed_keys = ['map', 'sensitivities', 'stats', 'mapper', 'classifier', 'ds', 'perm_pvalue', 'p'] allowed_results = [None, None, cvte.ca.stats, evds.a.mapper, fclf, evds, p_value, total_p_value] results_dict = dict(zip(allowed_keys, allowed_results)) results = dict() if not 'enable_results' in locals(): enable_results = allowed_keys[:] for elem in enable_results: if elem in allowed_keys: results[elem] = results_dict[elem] return results
def preprocess_dataset(ds, type_, **kwargs): """ Preprocess the dataset: detrending of single run and for chunks, the zscoring is also done by chunks and by run. Parameters ---------- ds : Dataset The dataset to be preprocessed type : string The experiment to be processed kwargs : dict mean_samples - boolean : if samples should be averaged label_included - list : list of labels to be included in the dataset label_dropped - string : label to be dropped (rest, fixation) Returns ------- Dataset the processed dataset """ mean = False normalization = 'feature' for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'label_included'): label_included = kwargs[arg].split(',') if (arg == 'label_dropped'): label_dropped = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) logger.info('Dataset preprocessing: Detrending...') if len(np.unique(ds.sa['file'])) != 1: poly_detrend(ds, polyord = 1, chunks_attr = 'file') poly_detrend(ds, polyord = 1, chunks_attr = 'chunks') if label_dropped != 'None': logger.info('Removing labels...') ds = ds[ds.sa.targets != label_dropped] if label_included != ['all']: ds = ds[np.array([l in label_included for l in ds.sa.targets], dtype='bool')] if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds)#, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': #Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
from sklearn.linear_model import RidgeCV,BayesianRidge from sklearn.preprocessing import StandardScaler from sklearn.cross_decomposition import PLSRegression from pandas import read_csv from sklearn.externals import joblib from encoding_helpers import * from itertools import combinations T3 = False for subj in xrange(12,13): subj_preprocessed_path = os.path.join('/home','mboos','SpeechEncoding','PreProcessed','subj%02dnpp.gzipped.hdf5' % subj) s1ds = mvpa.h5load(subj_preprocessed_path) events = mvpa.find_events(targets=s1ds.sa.targets,chunks=s1ds.sa.chunks) rvstr_TS = rolling_window(s1ds.sa['targets'][::-1].copy(),4) s1ds.sa['targets'].value[(np.where(np.apply_along_axis(lambda x : len(np.unique(x)) == 1 and x[0] != 'rest',1,rvstr_TS)[::-1])[0]+3)] = 'rest' labelsTS = s1ds.sa['targets'].value.copy() # <codecell> #unroll audio features #cut last 500ms featureTS = np.zeros((labelsTS.shape[0],20*ft_freq)) featureTS[labelsTS!='rest',:] = np.reshape(np.vstack([feature_dict[ev['targets']][:60,:] for ev in events if ev['targets']!='rest']),(-1,ft_freq*20)) # <codecell> #now lag the audiofeatures
# Load the pymvpa dataset. try: logger.info('Loading dataset...') ds = fmri_dataset(fmri_list, targets=attr.targets, chunks=attr.chunks, mask=mask) logger.info('Dataset loaded...') except ValueError, e: logger.error(subj + ' *** ERROR: '+ str(e)) del fmri_list return 0; # Update Dataset attributes # # TODO: Evaluate if it is useful to build a dedicated function ev_list = [] events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) for i in range(len(events)): duration = events[i]['duration'] for j in range(duration): ev_list.append(i+1) ds.a['events'] = events # Update event field ds.sa['events_number'] = ev_list # Update event number # Name added to do leave one subject out analysis ds.sa['name'] = [subj for i in range(len(ds.sa.chunks))] try: for k in attr.keys(): ds.sa[k] = attr[k] except BaseException, e: