def tqdm(iterable, *args, enabled=None, **kwargs): if enabled is None: enabled = set_tqdm_enabled.value from tqdm import tqdm as tqdm_ if enabled: return tqdm_(iterable, *args, **kwargs) return iterable
def tqdm(*args, mininterval=5.0, **kwargs): return tqdm_(*args, mininterval=mininterval, **kwargs)
def get_contrasts(fmri_img: Union[str, PathLike, PosixPath, Nifti1Image], events: Union[str, PathLike, PosixPath, pd.DataFrame], desc: str = 'effect_size', design_kws: Union[dict, Bunch] = None, glm_kws: Union[dict, Bunch] = None, masker_kws: Union[dict, Bunch] = None, standardize: bool = True, scale: bool = False, scale_between: tuple = (0, 1), maximize: bool = False, masker: [MultiNiftiMasker, NiftiLabelsMasker, NiftiMapsMasker, NiftiMasker] = None, feature_labels: Union[Sequence, pd.Index] = None, session=None, **kwargs ) -> Bunch: """ Return dict-like structure containing experimental contrasts. Using ``nilearn.glm.first_level.FirstLevel`` object, contrasts are first computed trial-wise. Then, the same is done for each experimental condition in ``trial_type_cols`` if a list of string is provided. Args: fmri_img: str, PathLike, PosixPath or Nifti1Image In-memory or path pointing to a ``nibabel.nifti1.Nifti1Image``. events: : str, PathLike, PosixPath or DataFrame In-memory or path pointing to a ``pandas.DataFrame``. desc: str (Default = 'effect_size') String passed to ``nilearn.glm.first_level.FirstLevel.compute_contrast`` ``desc`` parameter. design_kws: dict or Bunch (Deault = None) Dict-like mapping of keyword arguments passed to ``nilearn.glm.first_level.make_first_level_design_matrix``. If a ``session`` object is passed in the parameters, the value under the corresponding key is used. glm_kws: dict or Bunch (Deault = None) Dict-like mapping of keyword arguments passed to ``nilearn.glm.first_level.FirstLevel.__init__``. If a ``session`` object is passed in the parameters, the value under the corresponding key is used. masker_kws: dict or Bunch (Deault = None) Dict-like mapping of keyword arguments passed to ``masker.__init__``. If a ``session`` object is passed in the parameters, the value under the corresponding key is used. standardize: bool (Default = True) If true (by default), the extracted brain signals are standardized using a ``sklearn.preprocessing.StandardScaler`` object (demeaning ans scaling to variance). It is generally advised to standardize data for machine-learning operations. See notes for documentation, tutorials and more. scale: bool (Default = False) If true, the extracted brain signals are scaled (between 0 and 1 by default) using a ``sklearn.preprocessing.MinMaxScaler`` object. It is generally advised to standardize data for machine-learning operations. See notes for documentation, tutorials and more. scale_between: tuple (Default = (0, 1) Values between which the signal should be scaled. Default is (0, 1) - left = min, right = max. Only used if ``scale`` parameter is True. maximize: bool (Default = False) If true, scale each feature by its maximum absolute value. From the docs of ``sklearn.preprocessing.MaxAbsScaler``: '[...] Scales and translates each feature individually such that the maximal absolute value of each feature in training set is 1.0. Does not shift/center the data, and thus does not destroy any sparsity.' masker: MultiNiftiMasker, NiftiLabelsMasker, NiftiMapsMasker or NiftiMasker (Default = None) Masker object from the ``nilearn.input_data`` module meant to perform brain signal extraction (conversion from 4D or 3D image to 2D data). If omitted, a NiftiMasker with default parameters is used. feature_labels: List or pd.Index (Default = None) List of feature names used as columns for the brain signal matrix. Number of labels and number of features must match. An error is raised otherwise. session: dict or Bunch (Default = None) Dict-like structure containing all required and/or optional parameters. The functions ``fetch_fmriprep_session`` and ``get_fmri_session`` from ``cimaq_decoding_utils`` return a ``session`` object. It is similar to the return values of ``nilearn.datasets.fetch{dataset_name}`` functions. Returns: ``sklearn.utils.Bunch`` Dict-like structure with the following keys: ['model', 'contrast_img', 'signals', 'feature_labels', 'condition_labels'] Notes: https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing """ from sklearn.pipeline import Pipeline from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler from sklearn.preprocessing import StandardScaler from cimaq_decoding_utils import get_frame_times, get_t_r # Parameter initialization design_defs, glm_defs = {}, {} fmri_img = nimage.image.load_img(fmri_img) events = [events if isinstance(events, pd.DataFrame) else pd.read_csv(events, sep=['\t' if splitext(events)[1][1] == 't' else ','][0])][0] if session is not None: design_defs.update(session.design_defs) glm_defs.update(session.glm_defs) t_r, frame_times = get_t_r(fmri_img), get_frame_times(fmri_img) if design_kws is not None: design_defs.update(design_kws) if glm_kws is not None: glm_defs.update(glm_kws) # GLM initialization and contrast computation design = make_first_level_design_matrix(frame_times, events=events.iloc[1:, :], **design_defs) model = FirstLevelModel(**glm_defs).fit(run_imgs=fmri_img, design_matrices=design.iloc[:, 1:]) contrasts = nimage.concat_imgs([model.compute_contrast( trial, desc=desc) for trial in tqdm_(design.columns[:-1].astype(str), ncols=100, desc='Computing Contrasts')]) # Brain signals extraction pipe_components = ((standardize, 'standardize', StandardScaler()), (maximize, 'maximize', MaxAbsScaler()), (scale, 'scale', MinMaxScaler(scale_between))) pipe_components = [item[1:] for item in list(filter(lambda x: x[0], pipe_components))] signals = masker.transform_single_imgs(contrasts) if pipe_components != []: pipeline = Pipeline(pipe_components) signals = pipeline.fit_transform(signals) signals = pd.DataFrame(signals, index=design.iloc[:, :-1].columns) if feature_labels is not None: signals.set_axis(feature_labels, axis=1, inplace=True) return Bunch(model=model, contrast_img=contrasts, signals=signals, feature_labels=feature_labels)
def parse(self, process=None, batch=200, remove_after=False): """ Write a serie of compressed dict files to disk. Structure : for each page : - page_len : the xml lenght of the page - content : all the words, lower case, no ponct - ns : namespace - redirect : if the page redirect, where to - meta : the metadata of pages (infobox...) """ with open(self.work_directory+BLOCKLIST_PATH) as f: blocklist = json.load(f) global worker def worker(t): pages, blocklist = t parser = PageParser() pages = pages.split('<page>') dic = {} redirect = {} n_skip = 0 for page in pages: try: title = rx_title.search(page)[0] if not self.__is_ok(title, blocklist): n_skip += 1 continue except TypeError: if len(page) != 2: # real parsing errors print(page) continue if '<redirect' not in page: # xml parse namespace = int(rx_nm.search(page)[0]) text = page.split('<text', 1)[1].split('>', 1)[1].split('</text>')[0] # wikicode parse data = parser.parse(text) data['ns'] = namespace dic[title] = data dic[title]['page_len'] = len(page) else: to_page = page.split('<redirect title="', 1)[1].split('" />', 1)[0] redirect[title] = {'redirect': to_page} redirect[title]['page_len'] = len(page) return dic, redirect, n_skip print('[DumpParser] Start parsing...') g = self.generate_chunks() with mp.Pool(process) as p: bar = tqdm_(total=self.total) buff = {} redirect = {} params = ((i, blocklist) for i in g) i = 0 n_buf, n_red = 0, 0 for rep, red, n_skip in p.imap_unordered(worker, params): bar.update(len(rep)+len(red)+n_skip) buff.update(rep) redirect.update(red) if i%(batch//2) == 0: size = dict_size(buff)/1024**2 if size > batch: filename = 'pages_{}'.format(n_buf) self.__write_to_disk(buff, filename) buff.clear() n_buf += 1 if i%(batch*2) == 0: size = dict_size(redirect)/1024**2 if size > batch: filename = 'redirect_{}'.format(n_red) self.__write_to_disk(redirect, filename) redirect.clear() n_red += 1 i += 1 filename = 'pages_{}'.format(n_buf) self.__write_to_disk(buff, filename) filename = 'redirect_{}'.format(n_red) self.__write_to_disk(redirect, filename) if remove_after: os.remove(self.path_data) os.remove(self.path_index) print('[DumpParser] Files removed') self.thread.join() print('[DumpParser] Parsing to dictionnary finished!')